{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 4168, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 149.42857360839844, "epoch": 0.0009596928982725527, "grad_norm": 1.7652862071990967, "kl": 0.0, "learning_rate": 6.84931506849315e-09, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1 }, { "completion_length": 211.7857208251953, "epoch": 0.0019193857965451055, "grad_norm": 1.091597080230713, "kl": 0.0, "learning_rate": 1.36986301369863e-08, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2 }, { "completion_length": 176.1428680419922, "epoch": 0.0028790786948176585, "grad_norm": 1.2874499559402466, "kl": 0.0005459035746753216, "learning_rate": 2.054794520547945e-08, "loss": 0.0, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 3 }, { "completion_length": 157.7857208251953, "epoch": 0.003838771593090211, "grad_norm": 2.1096432209014893, "kl": 0.0004846069496124983, "learning_rate": 2.73972602739726e-08, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 4 }, { "completion_length": 228.9285888671875, "epoch": 0.0047984644913627635, "grad_norm": 0.675032377243042, "kl": 0.00040305525180883706, "learning_rate": 3.424657534246575e-08, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 5 }, { "completion_length": 196.2857208251953, "epoch": 0.005758157389635317, "grad_norm": 0.00021198269678279757, "kl": 0.00043050319072790444, "learning_rate": 4.10958904109589e-08, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 6 }, { "completion_length": 198.42857360839844, "epoch": 0.0067178502879078695, "grad_norm": 1.249631404876709, "kl": 0.0005320970667526126, "learning_rate": 4.794520547945205e-08, "loss": 0.0, "reward": 0.8214285969734192, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 7 }, { "completion_length": 190.35714721679688, "epoch": 0.007677543186180422, "grad_norm": 2.0703136920928955, "kl": 0.0004732342204079032, "learning_rate": 5.47945205479452e-08, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 8 }, { "completion_length": 185.6428680419922, "epoch": 0.008637236084452975, "grad_norm": 0.0002080567937809974, "kl": 0.0003981039917562157, "learning_rate": 6.164383561643836e-08, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 9 }, { "completion_length": 193.07144165039062, "epoch": 0.009596928982725527, "grad_norm": 0.8925747275352478, "kl": 0.0004366484936326742, "learning_rate": 6.84931506849315e-08, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 10 }, { "completion_length": 180.2857208251953, "epoch": 0.01055662188099808, "grad_norm": 1.8199864625930786, "kl": 0.0003857313422486186, "learning_rate": 7.534246575342466e-08, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 11 }, { "completion_length": 149.92857360839844, "epoch": 0.011516314779270634, "grad_norm": 1.0607343912124634, "kl": 0.0004078479250892997, "learning_rate": 8.21917808219178e-08, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 12 }, { "completion_length": 171.21429443359375, "epoch": 0.012476007677543186, "grad_norm": 0.8774348497390747, "kl": 0.0005502912099473178, "learning_rate": 8.904109589041094e-08, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 13 }, { "completion_length": 249.50001525878906, "epoch": 0.013435700575815739, "grad_norm": 1.3724201917648315, "kl": 0.0004685703606810421, "learning_rate": 9.58904109589041e-08, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 14 }, { "completion_length": 229.4285888671875, "epoch": 0.014395393474088292, "grad_norm": 1.6538445949554443, "kl": 0.0005058706155978143, "learning_rate": 1.0273972602739725e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 15 }, { "completion_length": 160.0, "epoch": 0.015355086372360844, "grad_norm": 1.9589234590530396, "kl": 0.0004664991865865886, "learning_rate": 1.095890410958904e-07, "loss": 0.0, "reward": 0.8214285969734192, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 16 }, { "completion_length": 160.35714721679688, "epoch": 0.016314779270633396, "grad_norm": 1.7844669818878174, "kl": 0.00046668402501381934, "learning_rate": 1.1643835616438355e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 17 }, { "completion_length": 187.21429443359375, "epoch": 0.01727447216890595, "grad_norm": 0.8248365521430969, "kl": 0.00046405484317801893, "learning_rate": 1.232876712328767e-07, "loss": 0.0, "reward": 0.8214285969734192, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 18 }, { "completion_length": 187.00001525878906, "epoch": 0.018234165067178502, "grad_norm": 0.7296115756034851, "kl": 0.00042631221003830433, "learning_rate": 1.3013698630136985e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 19 }, { "completion_length": 183.07144165039062, "epoch": 0.019193857965451054, "grad_norm": 0.49826255440711975, "kl": 0.0005027211736887693, "learning_rate": 1.36986301369863e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 20 }, { "completion_length": 194.2857208251953, "epoch": 0.02015355086372361, "grad_norm": 0.0002961342688649893, "kl": 0.0005313262226991355, "learning_rate": 1.4383561643835615e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 21 }, { "completion_length": 235.2857208251953, "epoch": 0.02111324376199616, "grad_norm": 0.9356268644332886, "kl": 0.0005150196375325322, "learning_rate": 1.506849315068493e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 22 }, { "completion_length": 166.1428680419922, "epoch": 0.022072936660268713, "grad_norm": 0.00022761950094718486, "kl": 0.00045642309123650193, "learning_rate": 1.5753424657534245e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 23 }, { "completion_length": 197.21429443359375, "epoch": 0.023032629558541268, "grad_norm": 0.00023177180264610797, "kl": 0.00047837855527177453, "learning_rate": 1.643835616438356e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 24 }, { "completion_length": 191.42857360839844, "epoch": 0.02399232245681382, "grad_norm": 0.00018584098143037409, "kl": 0.00039955542888492346, "learning_rate": 1.7123287671232875e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 25 }, { "completion_length": 161.35714721679688, "epoch": 0.02495201535508637, "grad_norm": 1.979371190071106, "kl": 0.00045707443496212363, "learning_rate": 1.780821917808219e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 26 }, { "completion_length": 180.42857360839844, "epoch": 0.025911708253358926, "grad_norm": 1.530049204826355, "kl": 0.0005106572061777115, "learning_rate": 1.8493150684931505e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 27 }, { "completion_length": 139.6428680419922, "epoch": 0.026871401151631478, "grad_norm": 2.4335715770721436, "kl": 0.00047851199633441865, "learning_rate": 1.917808219178082e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 28 }, { "completion_length": 245.1428680419922, "epoch": 0.02783109404990403, "grad_norm": 1.1963196992874146, "kl": 0.0005220815073698759, "learning_rate": 1.9863013698630135e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 29 }, { "completion_length": 177.35714721679688, "epoch": 0.028790786948176585, "grad_norm": 1.6823723316192627, "kl": 0.0004937481135129929, "learning_rate": 2.054794520547945e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 30 }, { "completion_length": 100.78572082519531, "epoch": 0.029750479846449136, "grad_norm": 3.010380983352661, "kl": 0.0006905650952830911, "learning_rate": 2.1232876712328765e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 31 }, { "completion_length": 156.92857360839844, "epoch": 0.030710172744721688, "grad_norm": 1.5910171270370483, "kl": 0.0005026881117373705, "learning_rate": 2.191780821917808e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 32 }, { "completion_length": 210.07144165039062, "epoch": 0.03166986564299424, "grad_norm": 0.8777531385421753, "kl": 0.00047323800390586257, "learning_rate": 2.2602739726027396e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 33 }, { "completion_length": 194.42857360839844, "epoch": 0.03262955854126679, "grad_norm": 0.8636143207550049, "kl": 0.0003911593521479517, "learning_rate": 2.328767123287671e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 34 }, { "completion_length": 230.85714721679688, "epoch": 0.03358925143953935, "grad_norm": 1.256912112236023, "kl": 0.00041964012780226767, "learning_rate": 2.3972602739726023e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 35 }, { "completion_length": 192.42857360839844, "epoch": 0.0345489443378119, "grad_norm": 1.0570237636566162, "kl": 0.0005136210820637643, "learning_rate": 2.465753424657534e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 36 }, { "completion_length": 158.7857208251953, "epoch": 0.03550863723608445, "grad_norm": 1.437069058418274, "kl": 0.00048153463285416365, "learning_rate": 2.5342465753424656e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 37 }, { "completion_length": 226.71429443359375, "epoch": 0.036468330134357005, "grad_norm": 0.7709242105484009, "kl": 0.00043737152009271085, "learning_rate": 2.602739726027397e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 38 }, { "completion_length": 171.71429443359375, "epoch": 0.03742802303262956, "grad_norm": 1.3941874504089355, "kl": 0.0005309264524839818, "learning_rate": 2.671232876712329e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 39 }, { "completion_length": 210.35714721679688, "epoch": 0.03838771593090211, "grad_norm": 0.00023455015616491437, "kl": 0.00043281022226437926, "learning_rate": 2.73972602739726e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 40 }, { "completion_length": 331.0714416503906, "epoch": 0.03934740882917467, "grad_norm": 1.4091070890426636, "kl": 0.000502074952237308, "learning_rate": 2.8082191780821916e-07, "loss": 0.0, "reward": 0.7500000596046448, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 41 }, { "completion_length": 174.71429443359375, "epoch": 0.04030710172744722, "grad_norm": 0.8740628957748413, "kl": 0.0005726324743591249, "learning_rate": 2.876712328767123e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 42 }, { "completion_length": 179.35714721679688, "epoch": 0.04126679462571977, "grad_norm": 2.0238893032073975, "kl": 0.00048663251800462604, "learning_rate": 2.945205479452055e-07, "loss": 0.0, "reward": 0.8214285969734192, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 43 }, { "completion_length": 203.71429443359375, "epoch": 0.04222648752399232, "grad_norm": 1.0045762062072754, "kl": 0.0005395413609221578, "learning_rate": 3.013698630136986e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 44 }, { "completion_length": 219.1428680419922, "epoch": 0.04318618042226487, "grad_norm": 1.2194923162460327, "kl": 0.000419835327193141, "learning_rate": 3.0821917808219176e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 45 }, { "completion_length": 160.21429443359375, "epoch": 0.044145873320537425, "grad_norm": 0.8499300479888916, "kl": 0.000426066922955215, "learning_rate": 3.150684931506849e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 46 }, { "completion_length": 224.9285888671875, "epoch": 0.045105566218809984, "grad_norm": 0.000264274247456342, "kl": 0.0004919467610307038, "learning_rate": 3.219178082191781e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 47 }, { "completion_length": 236.71429443359375, "epoch": 0.046065259117082535, "grad_norm": 1.4246731996536255, "kl": 0.00047361559700220823, "learning_rate": 3.287671232876712e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 48 }, { "completion_length": 163.42857360839844, "epoch": 0.04702495201535509, "grad_norm": 1.876747965812683, "kl": 0.0005332269938662648, "learning_rate": 3.3561643835616436e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 49 }, { "completion_length": 238.7857208251953, "epoch": 0.04798464491362764, "grad_norm": 1.4977957010269165, "kl": 0.0004816977307200432, "learning_rate": 3.424657534246575e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 50 }, { "completion_length": 163.07144165039062, "epoch": 0.04894433781190019, "grad_norm": 1.192659854888916, "kl": 0.0005493149510584772, "learning_rate": 3.493150684931507e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 51 }, { "completion_length": 171.85714721679688, "epoch": 0.04990403071017274, "grad_norm": 1.200013518333435, "kl": 0.00046320370165631175, "learning_rate": 3.561643835616438e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 52 }, { "completion_length": 182.42857360839844, "epoch": 0.0508637236084453, "grad_norm": 0.00033107379567809403, "kl": 0.00041765885544009507, "learning_rate": 3.6301369863013697e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 53 }, { "completion_length": 164.2857208251953, "epoch": 0.05182341650671785, "grad_norm": 1.1692883968353271, "kl": 0.0005812789313495159, "learning_rate": 3.698630136986301e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 54 }, { "completion_length": 196.1428680419922, "epoch": 0.052783109404990404, "grad_norm": 1.4730701446533203, "kl": 0.0005336858448572457, "learning_rate": 3.767123287671233e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 55 }, { "completion_length": 195.1428680419922, "epoch": 0.053742802303262956, "grad_norm": 0.00043033124529756606, "kl": 0.0005269832909107208, "learning_rate": 3.835616438356164e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 56 }, { "completion_length": 224.21429443359375, "epoch": 0.05470249520153551, "grad_norm": 0.9621931910514832, "kl": 0.00049375812523067, "learning_rate": 3.9041095890410957e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 57 }, { "completion_length": 171.07144165039062, "epoch": 0.05566218809980806, "grad_norm": 1.851424217224121, "kl": 0.0005759054329246283, "learning_rate": 3.972602739726027e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 58 }, { "completion_length": 211.92857360839844, "epoch": 0.05662188099808062, "grad_norm": 1.220913052558899, "kl": 0.0006403859588317573, "learning_rate": 4.041095890410959e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 59 }, { "completion_length": 214.7857208251953, "epoch": 0.05758157389635317, "grad_norm": 1.0436677932739258, "kl": 0.000614205957390368, "learning_rate": 4.10958904109589e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 60 }, { "completion_length": 240.07144165039062, "epoch": 0.05854126679462572, "grad_norm": 1.06368887424469, "kl": 0.0006605104426853359, "learning_rate": 4.1780821917808217e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 61 }, { "completion_length": 150.1428680419922, "epoch": 0.05950095969289827, "grad_norm": 1.8254249095916748, "kl": 0.0005796228069812059, "learning_rate": 4.246575342465753e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 62 }, { "completion_length": 253.57144165039062, "epoch": 0.060460652591170824, "grad_norm": 1.0903822183609009, "kl": 0.0005528093315660954, "learning_rate": 4.315068493150685e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 63 }, { "completion_length": 179.35714721679688, "epoch": 0.061420345489443376, "grad_norm": 0.9492788910865784, "kl": 0.0008303487556986511, "learning_rate": 4.383561643835616e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 64 }, { "completion_length": 149.35714721679688, "epoch": 0.06238003838771593, "grad_norm": 1.4028934240341187, "kl": 0.0007781743188388646, "learning_rate": 4.4520547945205477e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 65 }, { "completion_length": 148.7857208251953, "epoch": 0.06333973128598848, "grad_norm": 0.0005605512415058911, "kl": 0.0007003864157013595, "learning_rate": 4.520547945205479e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 66 }, { "completion_length": 180.85714721679688, "epoch": 0.06429942418426103, "grad_norm": 1.512935996055603, "kl": 0.0008337364997714758, "learning_rate": 4.589041095890411e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 67 }, { "completion_length": 250.6428680419922, "epoch": 0.06525911708253358, "grad_norm": 0.00042898725951090455, "kl": 0.0006646972615271807, "learning_rate": 4.657534246575342e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 68 }, { "completion_length": 172.21429443359375, "epoch": 0.06621880998080615, "grad_norm": 0.0006543786730617285, "kl": 0.0009235838078893721, "learning_rate": 4.726027397260274e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 69 }, { "completion_length": 177.92857360839844, "epoch": 0.0671785028790787, "grad_norm": 1.167068362236023, "kl": 0.0006242688978090882, "learning_rate": 4.794520547945205e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 70 }, { "completion_length": 209.92857360839844, "epoch": 0.06813819577735125, "grad_norm": 1.7529125213623047, "kl": 0.0007369662052951753, "learning_rate": 4.863013698630137e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 71 }, { "completion_length": 164.07144165039062, "epoch": 0.0690978886756238, "grad_norm": 0.0006184062804095447, "kl": 0.0007946805562824011, "learning_rate": 4.931506849315068e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 72 }, { "completion_length": 191.7857208251953, "epoch": 0.07005758157389635, "grad_norm": 0.0006236507324501872, "kl": 0.0006776523077860475, "learning_rate": 5e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 73 }, { "completion_length": 154.35714721679688, "epoch": 0.0710172744721689, "grad_norm": 1.4245244264602661, "kl": 0.0008888279553502798, "learning_rate": 4.999986861014787e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 74 }, { "completion_length": 137.92857360839844, "epoch": 0.07197696737044146, "grad_norm": 0.0008591034566052258, "kl": 0.0008482217090204358, "learning_rate": 4.999947444197252e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 75 }, { "completion_length": 173.21429443359375, "epoch": 0.07293666026871401, "grad_norm": 1.50313401222229, "kl": 0.0007173003978095949, "learning_rate": 4.999881749961716e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 76 }, { "completion_length": 152.6428680419922, "epoch": 0.07389635316698656, "grad_norm": 0.841323971748352, "kl": 0.0007954436005093157, "learning_rate": 4.999789778998701e-07, "loss": 0.0, "reward": 0.8214285969734192, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.4285714626312256, "rewards/check_similarity_func": 0.392857164144516, "step": 77 }, { "completion_length": 174.7857208251953, "epoch": 0.07485604606525911, "grad_norm": 0.0006210668943822384, "kl": 0.0007593909394927323, "learning_rate": 4.999671532274931e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 78 }, { "completion_length": 234.85714721679688, "epoch": 0.07581573896353166, "grad_norm": 0.0006548356031998992, "kl": 0.0008718297467567027, "learning_rate": 4.99952701103332e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 79 }, { "completion_length": 180.57144165039062, "epoch": 0.07677543186180422, "grad_norm": 0.0007575255585834384, "kl": 0.000927746354136616, "learning_rate": 4.99935621679296e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 80 }, { "completion_length": 165.35714721679688, "epoch": 0.07773512476007678, "grad_norm": 2.039008855819702, "kl": 0.0012851539067924023, "learning_rate": 4.999159151349098e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 81 }, { "completion_length": 152.57144165039062, "epoch": 0.07869481765834933, "grad_norm": 1.3241513967514038, "kl": 0.0011750230332836509, "learning_rate": 4.998935816773127e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 82 }, { "completion_length": 204.57144165039062, "epoch": 0.07965451055662189, "grad_norm": 0.8959946632385254, "kl": 0.0009609319386072457, "learning_rate": 4.998686215412561e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 83 }, { "completion_length": 164.42857360839844, "epoch": 0.08061420345489444, "grad_norm": 1.787232756614685, "kl": 0.001210602349601686, "learning_rate": 4.998410349891004e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 84 }, { "completion_length": 194.07144165039062, "epoch": 0.08157389635316699, "grad_norm": 1.171020269393921, "kl": 0.00131676543969661, "learning_rate": 4.998108223108133e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 85 }, { "completion_length": 155.57144165039062, "epoch": 0.08253358925143954, "grad_norm": 2.2473363876342773, "kl": 0.0015701391967013478, "learning_rate": 4.997779838239657e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 86 }, { "completion_length": 201.6428680419922, "epoch": 0.08349328214971209, "grad_norm": 2.2553136348724365, "kl": 0.0015474478714168072, "learning_rate": 4.997425198737292e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 87 }, { "completion_length": 135.7857208251953, "epoch": 0.08445297504798464, "grad_norm": 0.001240782206878066, "kl": 0.0014980220003053546, "learning_rate": 4.997044308328722e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 88 }, { "completion_length": 148.5, "epoch": 0.0854126679462572, "grad_norm": 0.693618893623352, "kl": 0.0017393698217347264, "learning_rate": 4.996637171017555e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 89 }, { "completion_length": 199.92857360839844, "epoch": 0.08637236084452975, "grad_norm": 1.0596736669540405, "kl": 0.0011438778601586819, "learning_rate": 4.99620379108329e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 90 }, { "completion_length": 218.2857208251953, "epoch": 0.0873320537428023, "grad_norm": 1.292203426361084, "kl": 0.0011100020492449403, "learning_rate": 4.995744173081264e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 91 }, { "completion_length": 159.92857360839844, "epoch": 0.08829174664107485, "grad_norm": 3.5143728256225586, "kl": 0.001979230670258403, "learning_rate": 4.995258321842611e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 92 }, { "completion_length": 175.2857208251953, "epoch": 0.0892514395393474, "grad_norm": 0.0010907513787969947, "kl": 0.0013792816316708922, "learning_rate": 4.994746242474199e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 93 }, { "completion_length": 143.7857208251953, "epoch": 0.09021113243761997, "grad_norm": 1.1681162118911743, "kl": 0.001693969708867371, "learning_rate": 4.994207940358596e-07, "loss": 0.0, "reward": 0.8214285969734192, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 94 }, { "completion_length": 212.1428680419922, "epoch": 0.09117082533589252, "grad_norm": 0.9037929177284241, "kl": 0.0015703574754297733, "learning_rate": 4.993643421153995e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 95 }, { "completion_length": 169.92857360839844, "epoch": 0.09213051823416507, "grad_norm": 1.0435338020324707, "kl": 0.0018234578892588615, "learning_rate": 4.993052690794164e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.4285714626312256, "rewards/check_similarity_func": 0.392857164144516, "step": 96 }, { "completion_length": 196.71429443359375, "epoch": 0.09309021113243762, "grad_norm": 0.83295077085495, "kl": 0.001929515739902854, "learning_rate": 4.99243575548838e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 97 }, { "completion_length": 193.2857208251953, "epoch": 0.09404990403071017, "grad_norm": 0.7400502562522888, "kl": 0.0018463353626430035, "learning_rate": 4.991792621721365e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 98 }, { "completion_length": 185.6428680419922, "epoch": 0.09500959692898273, "grad_norm": 0.0012302921386435628, "kl": 0.0016211157198995352, "learning_rate": 4.991123296253223e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 99 }, { "completion_length": 164.35714721679688, "epoch": 0.09596928982725528, "grad_norm": 1.795427918434143, "kl": 0.0017312451964244246, "learning_rate": 4.990427786119356e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 100 }, { "completion_length": 151.07144165039062, "epoch": 0.09692898272552783, "grad_norm": 1.3232777118682861, "kl": 0.0019214052008464932, "learning_rate": 4.989706098630405e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 101 }, { "completion_length": 190.7857208251953, "epoch": 0.09788867562380038, "grad_norm": 0.9907654523849487, "kl": 0.0019282526336610317, "learning_rate": 4.98895824137216e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 102 }, { "completion_length": 165.71429443359375, "epoch": 0.09884836852207293, "grad_norm": 1.6092760562896729, "kl": 0.0019320814171805978, "learning_rate": 4.988184222205492e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 103 }, { "completion_length": 206.42857360839844, "epoch": 0.09980806142034548, "grad_norm": 0.940392255783081, "kl": 0.0020117773674428463, "learning_rate": 4.98738404926626e-07, "loss": 0.0, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 104 }, { "completion_length": 153.7857208251953, "epoch": 0.10076775431861804, "grad_norm": 0.0015736296772956848, "kl": 0.002285775961354375, "learning_rate": 4.986557730965234e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 105 }, { "completion_length": 205.07144165039062, "epoch": 0.1017274472168906, "grad_norm": 0.993496835231781, "kl": 0.0023006112314760685, "learning_rate": 4.985705275988e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 106 }, { "completion_length": 170.7857208251953, "epoch": 0.10268714011516315, "grad_norm": 0.0017618397250771523, "kl": 0.002346398076042533, "learning_rate": 4.984826693294873e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 107 }, { "completion_length": 184.35714721679688, "epoch": 0.1036468330134357, "grad_norm": 0.0013113424647599459, "kl": 0.0021235402673482895, "learning_rate": 4.983921992120801e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 108 }, { "completion_length": 197.57144165039062, "epoch": 0.10460652591170826, "grad_norm": 0.0016514918534085155, "kl": 0.002535882405936718, "learning_rate": 4.98299118197527e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 109 }, { "completion_length": 253.07144165039062, "epoch": 0.10556621880998081, "grad_norm": 0.991838812828064, "kl": 0.0026058093644678593, "learning_rate": 4.982034272642197e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 110 }, { "completion_length": 194.50001525878906, "epoch": 0.10652591170825336, "grad_norm": 0.001055152271874249, "kl": 0.0017826651455834508, "learning_rate": 4.98105127417984e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 111 }, { "completion_length": 137.85714721679688, "epoch": 0.10748560460652591, "grad_norm": 2.029280185699463, "kl": 0.0037521887570619583, "learning_rate": 4.980042196920677e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 112 }, { "completion_length": 187.50001525878906, "epoch": 0.10844529750479846, "grad_norm": 0.0018392400816082954, "kl": 0.002890396397560835, "learning_rate": 4.979007051471312e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 113 }, { "completion_length": 186.57144165039062, "epoch": 0.10940499040307101, "grad_norm": 0.720744788646698, "kl": 0.0027650410775095224, "learning_rate": 4.977945848712352e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 114 }, { "completion_length": 187.35714721679688, "epoch": 0.11036468330134357, "grad_norm": 0.0014886766439303756, "kl": 0.0023108210880309343, "learning_rate": 4.976858599798298e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 115 }, { "completion_length": 264.5714416503906, "epoch": 0.11132437619961612, "grad_norm": 0.899320125579834, "kl": 0.0019858619198203087, "learning_rate": 4.975745316157431e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 116 }, { "completion_length": 188.21429443359375, "epoch": 0.11228406909788867, "grad_norm": 0.001953016733750701, "kl": 0.0032069848384708166, "learning_rate": 4.974606009491682e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 117 }, { "completion_length": 203.92857360839844, "epoch": 0.11324376199616124, "grad_norm": 0.002662137383595109, "kl": 0.004089969675987959, "learning_rate": 4.973440691776519e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 118 }, { "completion_length": 180.50001525878906, "epoch": 0.11420345489443379, "grad_norm": 0.6971079111099243, "kl": 0.0022500501945614815, "learning_rate": 4.972249375260816e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.4285714626312256, "rewards/check_similarity_func": 0.4285714626312256, "step": 119 }, { "completion_length": 214.00001525878906, "epoch": 0.11516314779270634, "grad_norm": 0.001525025931186974, "kl": 0.0026641013100743294, "learning_rate": 4.971032072466725e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 120 }, { "completion_length": 164.21429443359375, "epoch": 0.11612284069097889, "grad_norm": 0.002335787983611226, "kl": 0.004394500516355038, "learning_rate": 4.969788796189544e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 121 }, { "completion_length": 245.57144165039062, "epoch": 0.11708253358925144, "grad_norm": 1.0781161785125732, "kl": 0.003321615280583501, "learning_rate": 4.968519559497584e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 122 }, { "completion_length": 150.1428680419922, "epoch": 0.118042226487524, "grad_norm": 1.4728587865829468, "kl": 0.004604162182658911, "learning_rate": 4.967224375732031e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 123 }, { "completion_length": 181.2857208251953, "epoch": 0.11900191938579655, "grad_norm": 1.3879705667495728, "kl": 0.005900422111153603, "learning_rate": 4.965903258506806e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 124 }, { "completion_length": 194.50001525878906, "epoch": 0.1199616122840691, "grad_norm": 0.001510600675828755, "kl": 0.0028985326644033194, "learning_rate": 4.964556221708419e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 125 }, { "completion_length": 196.07144165039062, "epoch": 0.12092130518234165, "grad_norm": 0.001902991090901196, "kl": 0.003511160146445036, "learning_rate": 4.963183279495829e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 126 }, { "completion_length": 156.07144165039062, "epoch": 0.1218809980806142, "grad_norm": 0.0018762067193165421, "kl": 0.0036501954309642315, "learning_rate": 4.961784446300289e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 127 }, { "completion_length": 168.5, "epoch": 0.12284069097888675, "grad_norm": 0.0025846411008387804, "kl": 0.004706065636128187, "learning_rate": 4.960359736825198e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 128 }, { "completion_length": 151.35714721679688, "epoch": 0.1238003838771593, "grad_norm": 0.0029303100891411304, "kl": 0.0054856245405972, "learning_rate": 4.958909166045946e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 129 }, { "completion_length": 184.07144165039062, "epoch": 0.12476007677543186, "grad_norm": 0.8407133221626282, "kl": 0.0047586457803845406, "learning_rate": 4.957432749209755e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 130 }, { "completion_length": 111.92857360839844, "epoch": 0.1257197696737044, "grad_norm": 1.227720856666565, "kl": 0.006375845056027174, "learning_rate": 4.955930501835519e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 131 }, { "completion_length": 187.6428680419922, "epoch": 0.12667946257197696, "grad_norm": 0.0019379121949896216, "kl": 0.0037213603500276804, "learning_rate": 4.954402439713645e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 132 }, { "completion_length": 207.00001525878906, "epoch": 0.1276391554702495, "grad_norm": 1.9353463649749756, "kl": 0.004825687035918236, "learning_rate": 4.95284857890588e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 133 }, { "completion_length": 184.6428680419922, "epoch": 0.12859884836852206, "grad_norm": 0.7461886405944824, "kl": 0.00727982958778739, "learning_rate": 4.951268935745148e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 134 }, { "completion_length": 207.1428680419922, "epoch": 0.1295585412667946, "grad_norm": 1.8066493272781372, "kl": 0.004730013199150562, "learning_rate": 4.949663526835375e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 135 }, { "completion_length": 168.85714721679688, "epoch": 0.13051823416506717, "grad_norm": 1.422582745552063, "kl": 0.004582114517688751, "learning_rate": 4.948032369051317e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 136 }, { "completion_length": 179.6428680419922, "epoch": 0.13147792706333974, "grad_norm": 0.6377055048942566, "kl": 0.005797920282930136, "learning_rate": 4.946375479538379e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 137 }, { "completion_length": 153.35714721679688, "epoch": 0.1324376199616123, "grad_norm": 1.7647324800491333, "kl": 0.005460761953145266, "learning_rate": 4.944692875712441e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 138 }, { "completion_length": 165.21429443359375, "epoch": 0.13339731285988485, "grad_norm": 0.871253252029419, "kl": 0.011384781450033188, "learning_rate": 4.942984575259665e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 139 }, { "completion_length": 197.92857360839844, "epoch": 0.1343570057581574, "grad_norm": 0.004494763910770416, "kl": 0.008076949045062065, "learning_rate": 4.941250596136321e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 140 }, { "completion_length": 237.6428680419922, "epoch": 0.13531669865642995, "grad_norm": 1.0263288021087646, "kl": 0.0053758202120661736, "learning_rate": 4.939490956568589e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 141 }, { "completion_length": 195.21429443359375, "epoch": 0.1362763915547025, "grad_norm": 0.9987514615058899, "kl": 0.009137335233390331, "learning_rate": 4.937705675052372e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 142 }, { "completion_length": 206.57144165039062, "epoch": 0.13723608445297505, "grad_norm": 1.6772449016571045, "kl": 0.006298387888818979, "learning_rate": 4.935894770353099e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 143 }, { "completion_length": 209.35714721679688, "epoch": 0.1381957773512476, "grad_norm": 0.6112613677978516, "kl": 0.0054167406633496284, "learning_rate": 4.934058261505531e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 144 }, { "completion_length": 201.07144165039062, "epoch": 0.13915547024952016, "grad_norm": 1.0353496074676514, "kl": 0.006668028887361288, "learning_rate": 4.932196167813558e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 145 }, { "completion_length": 177.6428680419922, "epoch": 0.1401151631477927, "grad_norm": 0.6712509989738464, "kl": 0.006500499788671732, "learning_rate": 4.930308508849996e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 146 }, { "completion_length": 180.35714721679688, "epoch": 0.14107485604606526, "grad_norm": 0.7718272805213928, "kl": 0.009345046244561672, "learning_rate": 4.928395304456386e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 147 }, { "completion_length": 188.7857208251953, "epoch": 0.1420345489443378, "grad_norm": 1.8448102474212646, "kl": 0.008501261472702026, "learning_rate": 4.926456574742778e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 148 }, { "completion_length": 168.7857208251953, "epoch": 0.14299424184261036, "grad_norm": 0.00260656769387424, "kl": 0.007090277038514614, "learning_rate": 4.924492340087524e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 149 }, { "completion_length": 193.35714721679688, "epoch": 0.14395393474088292, "grad_norm": 0.0031598450150340796, "kl": 0.008664984256029129, "learning_rate": 4.922502621137065e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 150 }, { "completion_length": 136.5, "epoch": 0.14491362763915547, "grad_norm": 0.9014323353767395, "kl": 0.010229033417999744, "learning_rate": 4.92048743880571e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 151 }, { "completion_length": 168.21429443359375, "epoch": 0.14587332053742802, "grad_norm": 0.0032933016773313284, "kl": 0.009554143995046616, "learning_rate": 4.918446814275422e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 152 }, { "completion_length": 234.9285888671875, "epoch": 0.14683301343570057, "grad_norm": 0.0033402442932128906, "kl": 0.009741727262735367, "learning_rate": 4.916380768995589e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 153 }, { "completion_length": 204.92857360839844, "epoch": 0.14779270633397312, "grad_norm": 0.0028059438336640596, "kl": 0.008780969306826591, "learning_rate": 4.9142893246828e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 154 }, { "completion_length": 223.57144165039062, "epoch": 0.14875239923224567, "grad_norm": 0.6354474425315857, "kl": 0.010058660060167313, "learning_rate": 4.912172503320621e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 155 }, { "completion_length": 195.57144165039062, "epoch": 0.14971209213051823, "grad_norm": 0.7040085792541504, "kl": 0.0070348032750189304, "learning_rate": 4.910030327159359e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 156 }, { "completion_length": 200.1428680419922, "epoch": 0.15067178502879078, "grad_norm": 0.3525971472263336, "kl": 0.009910700842738152, "learning_rate": 4.907862818715831e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 157 }, { "completion_length": 240.00001525878906, "epoch": 0.15163147792706333, "grad_norm": 0.5699383020401001, "kl": 0.009681749157607555, "learning_rate": 4.905670000773126e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 158 }, { "completion_length": 211.35714721679688, "epoch": 0.15259117082533588, "grad_norm": 1.1533465385437012, "kl": 0.01693972945213318, "learning_rate": 4.903451896380366e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 159 }, { "completion_length": 187.1428680419922, "epoch": 0.15355086372360843, "grad_norm": 0.003596064867451787, "kl": 0.011682404205203056, "learning_rate": 4.901208528852464e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 160 }, { "completion_length": 213.92857360839844, "epoch": 0.15451055662188098, "grad_norm": 0.0023009134456515312, "kl": 0.008704651147127151, "learning_rate": 4.898939921769879e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 161 }, { "completion_length": 130.21429443359375, "epoch": 0.15547024952015356, "grad_norm": 0.005562320351600647, "kl": 0.01667219214141369, "learning_rate": 4.896646098978365e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 162 }, { "completion_length": 149.35714721679688, "epoch": 0.15642994241842612, "grad_norm": 1.5551508665084839, "kl": 0.01384158618748188, "learning_rate": 4.894327084588727e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 163 }, { "completion_length": 156.71429443359375, "epoch": 0.15738963531669867, "grad_norm": 0.6273984313011169, "kl": 0.014453531242907047, "learning_rate": 4.89198290297656e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 164 }, { "completion_length": 134.57144165039062, "epoch": 0.15834932821497122, "grad_norm": 1.1885071992874146, "kl": 0.01909637451171875, "learning_rate": 4.889613578781998e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 165 }, { "completion_length": 143.35714721679688, "epoch": 0.15930902111324377, "grad_norm": 1.3448946475982666, "kl": 0.02029375545680523, "learning_rate": 4.887219136909455e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 166 }, { "completion_length": 183.7857208251953, "epoch": 0.16026871401151632, "grad_norm": 1.2920584678649902, "kl": 0.01534243207424879, "learning_rate": 4.884799602527359e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 167 }, { "completion_length": 163.92857360839844, "epoch": 0.16122840690978887, "grad_norm": 0.005278433673083782, "kl": 0.016900423914194107, "learning_rate": 4.882355001067891e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 168 }, { "completion_length": 152.1428680419922, "epoch": 0.16218809980806143, "grad_norm": 0.0052945055067539215, "kl": 0.01847376674413681, "learning_rate": 4.879885358226717e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 169 }, { "completion_length": 190.71429443359375, "epoch": 0.16314779270633398, "grad_norm": 0.00422052713111043, "kl": 0.014795646071434021, "learning_rate": 4.877390699962719e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 170 }, { "completion_length": 156.2857208251953, "epoch": 0.16410748560460653, "grad_norm": 0.0037692708428949118, "kl": 0.015278493985533714, "learning_rate": 4.874871052497718e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 171 }, { "completion_length": 153.7857208251953, "epoch": 0.16506717850287908, "grad_norm": 1.1480664014816284, "kl": 0.0181843563914299, "learning_rate": 4.872326442316202e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 172 }, { "completion_length": 169.0, "epoch": 0.16602687140115163, "grad_norm": 0.009811441414058208, "kl": 0.022911228239536285, "learning_rate": 4.86975689616505e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 173 }, { "completion_length": 142.6428680419922, "epoch": 0.16698656429942418, "grad_norm": 1.0593441724777222, "kl": 0.023030204698443413, "learning_rate": 4.867162441053243e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 174 }, { "completion_length": 140.21429443359375, "epoch": 0.16794625719769674, "grad_norm": 0.004788343794643879, "kl": 0.01765555888414383, "learning_rate": 4.864543104251586e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 175 }, { "completion_length": 187.21429443359375, "epoch": 0.1689059500959693, "grad_norm": 1.266419529914856, "kl": 0.014879493042826653, "learning_rate": 4.861898913292424e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 176 }, { "completion_length": 178.71429443359375, "epoch": 0.16986564299424184, "grad_norm": 0.0040213484317064285, "kl": 0.017254559323191643, "learning_rate": 4.859229895969343e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 177 }, { "completion_length": 193.07144165039062, "epoch": 0.1708253358925144, "grad_norm": 0.0028829851653426886, "kl": 0.010877002961933613, "learning_rate": 4.856536080336889e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 178 }, { "completion_length": 137.21429443359375, "epoch": 0.17178502879078694, "grad_norm": 1.1005918979644775, "kl": 0.018745139241218567, "learning_rate": 4.853817494710262e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 179 }, { "completion_length": 110.71429443359375, "epoch": 0.1727447216890595, "grad_norm": 0.004471378400921822, "kl": 0.017219379544258118, "learning_rate": 4.851074167665029e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 180 }, { "completion_length": 204.35714721679688, "epoch": 0.17370441458733205, "grad_norm": 1.1199885606765747, "kl": 0.016226841136813164, "learning_rate": 4.848306128036817e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 181 }, { "completion_length": 289.4285888671875, "epoch": 0.1746641074856046, "grad_norm": 0.6023062467575073, "kl": 0.013112773187458515, "learning_rate": 4.84551340492101e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 182 }, { "completion_length": 154.0, "epoch": 0.17562380038387715, "grad_norm": 0.009614299051463604, "kl": 0.025441182777285576, "learning_rate": 4.842696027672447e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 183 }, { "completion_length": 179.35714721679688, "epoch": 0.1765834932821497, "grad_norm": 0.0044060805812478065, "kl": 0.017291691154241562, "learning_rate": 4.839854025905111e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 184 }, { "completion_length": 171.50001525878906, "epoch": 0.17754318618042225, "grad_norm": 0.004085490480065346, "kl": 0.014271439053118229, "learning_rate": 4.836987429491815e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 185 }, { "completion_length": 120.5714340209961, "epoch": 0.1785028790786948, "grad_norm": 0.006327782291918993, "kl": 0.02461230382323265, "learning_rate": 4.834096268563896e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 186 }, { "completion_length": 155.6428680419922, "epoch": 0.17946257197696738, "grad_norm": 0.005439620930701494, "kl": 0.019749080762267113, "learning_rate": 4.83118057351089e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 187 }, { "completion_length": 128.71429443359375, "epoch": 0.18042226487523993, "grad_norm": 2.216181993484497, "kl": 0.021259106695652008, "learning_rate": 4.828240374980215e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 188 }, { "completion_length": 192.6428680419922, "epoch": 0.1813819577735125, "grad_norm": 1.7119697332382202, "kl": 0.017793886363506317, "learning_rate": 4.825275703876852e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 189 }, { "completion_length": 178.92857360839844, "epoch": 0.18234165067178504, "grad_norm": 1.4379732608795166, "kl": 0.01828175224363804, "learning_rate": 4.822286591363017e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 190 }, { "completion_length": 149.35714721679688, "epoch": 0.1833013435700576, "grad_norm": 0.8005691766738892, "kl": 0.02084233984351158, "learning_rate": 4.819273068857834e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 191 }, { "completion_length": 128.57144165039062, "epoch": 0.18426103646833014, "grad_norm": 1.10016930103302, "kl": 0.02503875643014908, "learning_rate": 4.816235168037004e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 192 }, { "completion_length": 162.5, "epoch": 0.1852207293666027, "grad_norm": 0.005094869062304497, "kl": 0.018825901672244072, "learning_rate": 4.813172920832476e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 193 }, { "completion_length": 167.7857208251953, "epoch": 0.18618042226487524, "grad_norm": 0.004214588087052107, "kl": 0.01660284213721752, "learning_rate": 4.810086359432106e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 194 }, { "completion_length": 182.1428680419922, "epoch": 0.1871401151631478, "grad_norm": 0.0038420497439801693, "kl": 0.01660393923521042, "learning_rate": 4.80697551627932e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 195 }, { "completion_length": 142.57144165039062, "epoch": 0.18809980806142035, "grad_norm": 1.2804063558578491, "kl": 0.020383134484291077, "learning_rate": 4.803840424072778e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 196 }, { "completion_length": 190.2857208251953, "epoch": 0.1890595009596929, "grad_norm": 0.006129685323685408, "kl": 0.02046089433133602, "learning_rate": 4.800681115766023e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 197 }, { "completion_length": 175.57144165039062, "epoch": 0.19001919385796545, "grad_norm": 0.009177101776003838, "kl": 0.028699778020381927, "learning_rate": 4.79749762456714e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 198 }, { "completion_length": 201.1428680419922, "epoch": 0.190978886756238, "grad_norm": 0.6709465980529785, "kl": 0.019823528826236725, "learning_rate": 4.794289983938402e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 199 }, { "completion_length": 137.21429443359375, "epoch": 0.19193857965451055, "grad_norm": 0.013041699305176735, "kl": 0.04300703853368759, "learning_rate": 4.791058227595926e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 200 }, { "completion_length": 175.6428680419922, "epoch": 0.1928982725527831, "grad_norm": 0.0060727340169250965, "kl": 0.0255341287702322, "learning_rate": 4.787802389509309e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 201 }, { "completion_length": 168.57144165039062, "epoch": 0.19385796545105566, "grad_norm": 0.0055686309933662415, "kl": 0.021002165973186493, "learning_rate": 4.784522503901279e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 202 }, { "completion_length": 151.42857360839844, "epoch": 0.1948176583493282, "grad_norm": 0.005737540312111378, "kl": 0.023919476196169853, "learning_rate": 4.78121860524733e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 203 }, { "completion_length": 204.6428680419922, "epoch": 0.19577735124760076, "grad_norm": 0.7758740782737732, "kl": 0.02548404410481453, "learning_rate": 4.777890728275362e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 204 }, { "completion_length": 95.50000762939453, "epoch": 0.1967370441458733, "grad_norm": 2.1540911197662354, "kl": 0.03409199416637421, "learning_rate": 4.774538907965318e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 205 }, { "completion_length": 186.92857360839844, "epoch": 0.19769673704414586, "grad_norm": 0.003479765960946679, "kl": 0.016037967056035995, "learning_rate": 4.771163179548808e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 206 }, { "completion_length": 137.57144165039062, "epoch": 0.19865642994241842, "grad_norm": 0.004365667700767517, "kl": 0.019188418984413147, "learning_rate": 4.7677635785087544e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 207 }, { "completion_length": 141.21429443359375, "epoch": 0.19961612284069097, "grad_norm": 0.00950806774199009, "kl": 0.03066542185842991, "learning_rate": 4.764340140578999e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 208 }, { "completion_length": 132.7857208251953, "epoch": 0.20057581573896352, "grad_norm": 0.005804874002933502, "kl": 0.026386281475424767, "learning_rate": 4.760892901743944e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 209 }, { "completion_length": 177.57144165039062, "epoch": 0.20153550863723607, "grad_norm": 0.7388215065002441, "kl": 0.019911017268896103, "learning_rate": 4.757421898238164e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 210 }, { "completion_length": 160.57144165039062, "epoch": 0.20249520153550865, "grad_norm": 1.6219192743301392, "kl": 0.02288259007036686, "learning_rate": 4.7539271665460313e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 211 }, { "completion_length": 189.92857360839844, "epoch": 0.2034548944337812, "grad_norm": 0.006890783552080393, "kl": 0.02335246466100216, "learning_rate": 4.750408743401328e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 212 }, { "completion_length": 150.5, "epoch": 0.20441458733205375, "grad_norm": 0.005320583935827017, "kl": 0.024201389402151108, "learning_rate": 4.746866665786862e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 213 }, { "completion_length": 216.1428680419922, "epoch": 0.2053742802303263, "grad_norm": 0.0030971611849963665, "kl": 0.012719214893877506, "learning_rate": 4.7433009709340764e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 214 }, { "completion_length": 166.5, "epoch": 0.20633397312859886, "grad_norm": 1.3721002340316772, "kl": 0.02537263184785843, "learning_rate": 4.7397116963226627e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 215 }, { "completion_length": 171.21429443359375, "epoch": 0.2072936660268714, "grad_norm": 0.0044084275141358376, "kl": 0.018814370036125183, "learning_rate": 4.73609887968016e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 216 }, { "completion_length": 189.42857360839844, "epoch": 0.20825335892514396, "grad_norm": 0.003386329161003232, "kl": 0.018496019765734673, "learning_rate": 4.732462558981566e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 217 }, { "completion_length": 174.57144165039062, "epoch": 0.2092130518234165, "grad_norm": 0.005903204437345266, "kl": 0.02357655204832554, "learning_rate": 4.7288027724489293e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 218 }, { "completion_length": 144.85714721679688, "epoch": 0.21017274472168906, "grad_norm": 0.0063539124093949795, "kl": 0.026042813435196877, "learning_rate": 4.7251195585509563e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 219 }, { "completion_length": 225.57144165039062, "epoch": 0.21113243761996162, "grad_norm": 0.5840630531311035, "kl": 0.020570658147335052, "learning_rate": 4.7214129560026004e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 220 }, { "completion_length": 163.57144165039062, "epoch": 0.21209213051823417, "grad_norm": 0.8624732494354248, "kl": 0.030816497281193733, "learning_rate": 4.71768300376466e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 221 }, { "completion_length": 158.6428680419922, "epoch": 0.21305182341650672, "grad_norm": 0.010082244873046875, "kl": 0.03390180319547653, "learning_rate": 4.7139297410433633e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 222 }, { "completion_length": 219.1428680419922, "epoch": 0.21401151631477927, "grad_norm": 0.003929515834897757, "kl": 0.015034579671919346, "learning_rate": 4.710153207289962e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 223 }, { "completion_length": 169.71429443359375, "epoch": 0.21497120921305182, "grad_norm": 1.2498453855514526, "kl": 0.015637336298823357, "learning_rate": 4.7063534422003124e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 224 }, { "completion_length": 175.42857360839844, "epoch": 0.21593090211132437, "grad_norm": 0.6344961524009705, "kl": 0.018538078293204308, "learning_rate": 4.702530485714461e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 225 }, { "completion_length": 140.2857208251953, "epoch": 0.21689059500959693, "grad_norm": 1.6001454591751099, "kl": 0.03309370204806328, "learning_rate": 4.698684378016222e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 226 }, { "completion_length": 187.7857208251953, "epoch": 0.21785028790786948, "grad_norm": 0.003861071541905403, "kl": 0.016553178429603577, "learning_rate": 4.6948151595327585e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 227 }, { "completion_length": 179.50001525878906, "epoch": 0.21880998080614203, "grad_norm": 1.289595603942871, "kl": 0.02763347513973713, "learning_rate": 4.690922870934153e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 228 }, { "completion_length": 181.7857208251953, "epoch": 0.21976967370441458, "grad_norm": 0.005159349180757999, "kl": 0.021715404465794563, "learning_rate": 4.687007553132983e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 229 }, { "completion_length": 151.1428680419922, "epoch": 0.22072936660268713, "grad_norm": 1.0054240226745605, "kl": 0.125197172164917, "learning_rate": 4.6830692472838915e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 230 }, { "completion_length": 160.92857360839844, "epoch": 0.22168905950095968, "grad_norm": 0.0057836780324578285, "kl": 0.024565737694501877, "learning_rate": 4.6791079947831516e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 231 }, { "completion_length": 145.42857360839844, "epoch": 0.22264875239923224, "grad_norm": 0.006736478768289089, "kl": 0.027241801843047142, "learning_rate": 4.675123837268235e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 232 }, { "completion_length": 159.85714721679688, "epoch": 0.2236084452975048, "grad_norm": 0.7012543082237244, "kl": 0.024706266820430756, "learning_rate": 4.67111681661737e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 233 }, { "completion_length": 191.92857360839844, "epoch": 0.22456813819577734, "grad_norm": 0.0067091891542077065, "kl": 0.027213767170906067, "learning_rate": 4.667086974949105e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 234 }, { "completion_length": 168.57144165039062, "epoch": 0.2255278310940499, "grad_norm": 0.5987422466278076, "kl": 0.021960074082016945, "learning_rate": 4.663034354621863e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 235 }, { "completion_length": 143.21429443359375, "epoch": 0.22648752399232247, "grad_norm": 0.00807868130505085, "kl": 0.03393500670790672, "learning_rate": 4.658958998233501e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 236 }, { "completion_length": 172.71429443359375, "epoch": 0.22744721689059502, "grad_norm": 0.0045842514373362064, "kl": 0.023712413385510445, "learning_rate": 4.654860948620855e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 237 }, { "completion_length": 184.42857360839844, "epoch": 0.22840690978886757, "grad_norm": 0.005864701699465513, "kl": 0.02510451152920723, "learning_rate": 4.650740248859296e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 238 }, { "completion_length": 176.42857360839844, "epoch": 0.22936660268714013, "grad_norm": 1.0205570459365845, "kl": 0.023840270936489105, "learning_rate": 4.646596942262276e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 239 }, { "completion_length": 145.2857208251953, "epoch": 0.23032629558541268, "grad_norm": 0.005438005551695824, "kl": 0.02799619734287262, "learning_rate": 4.642431072380868e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 240 }, { "completion_length": 234.71429443359375, "epoch": 0.23128598848368523, "grad_norm": 0.7234974503517151, "kl": 0.015454381704330444, "learning_rate": 4.6382426830033163e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 241 }, { "completion_length": 136.5, "epoch": 0.23224568138195778, "grad_norm": 1.3218432664871216, "kl": 0.04652537405490875, "learning_rate": 4.6340318181545686e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 242 }, { "completion_length": 166.21429443359375, "epoch": 0.23320537428023033, "grad_norm": 0.01633215695619583, "kl": 0.04895491525530815, "learning_rate": 4.6297985220958176e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 243 }, { "completion_length": 197.71429443359375, "epoch": 0.23416506717850288, "grad_norm": 0.6628484129905701, "kl": 0.043864089995622635, "learning_rate": 4.6255428393240354e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 244 }, { "completion_length": 207.42857360839844, "epoch": 0.23512476007677544, "grad_norm": 0.4227886497974396, "kl": 0.019360190257430077, "learning_rate": 4.6212648145715037e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 245 }, { "completion_length": 146.85714721679688, "epoch": 0.236084452975048, "grad_norm": 0.007894124835729599, "kl": 0.04040703922510147, "learning_rate": 4.6169644928053466e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 246 }, { "completion_length": 199.21429443359375, "epoch": 0.23704414587332054, "grad_norm": 0.0060507142916321754, "kl": 0.029074154794216156, "learning_rate": 4.612641919227055e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 247 }, { "completion_length": 149.0, "epoch": 0.2380038387715931, "grad_norm": 0.010395902208983898, "kl": 0.039095718413591385, "learning_rate": 4.608297139272012e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 248 }, { "completion_length": 203.57144165039062, "epoch": 0.23896353166986564, "grad_norm": 0.00798006635159254, "kl": 0.02777903899550438, "learning_rate": 4.6039301986090195e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 249 }, { "completion_length": 161.2857208251953, "epoch": 0.2399232245681382, "grad_norm": 0.007671724073588848, "kl": 0.03263075277209282, "learning_rate": 4.59954114313981e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 250 }, { "completion_length": 148.21429443359375, "epoch": 0.24088291746641075, "grad_norm": 0.005485722795128822, "kl": 0.029129670932888985, "learning_rate": 4.595130018998574e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 251 }, { "completion_length": 133.21429443359375, "epoch": 0.2418426103646833, "grad_norm": 1.4581940174102783, "kl": 0.034938834607601166, "learning_rate": 4.590696872551466e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 252 }, { "completion_length": 163.6428680419922, "epoch": 0.24280230326295585, "grad_norm": 0.014568737708032131, "kl": 0.03749200701713562, "learning_rate": 4.586241750396122e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 253 }, { "completion_length": 134.7857208251953, "epoch": 0.2437619961612284, "grad_norm": 0.006637956947088242, "kl": 0.03361290320754051, "learning_rate": 4.5817646993611704e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 254 }, { "completion_length": 185.35714721679688, "epoch": 0.24472168905950095, "grad_norm": 0.004878560546785593, "kl": 0.025865240022540092, "learning_rate": 4.577265766505737e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 255 }, { "completion_length": 148.2857208251953, "epoch": 0.2456813819577735, "grad_norm": 0.6337690353393555, "kl": 0.04611246660351753, "learning_rate": 4.5727449991189506e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 256 }, { "completion_length": 174.35714721679688, "epoch": 0.24664107485604606, "grad_norm": 0.011049186810851097, "kl": 0.03321116045117378, "learning_rate": 4.568202444719449e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 257 }, { "completion_length": 136.92857360839844, "epoch": 0.2476007677543186, "grad_norm": 0.009243238717317581, "kl": 0.04298282787203789, "learning_rate": 4.563638151054875e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 258 }, { "completion_length": 142.85714721679688, "epoch": 0.24856046065259116, "grad_norm": 0.009046165272593498, "kl": 0.03803998976945877, "learning_rate": 4.559052166101379e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 259 }, { "completion_length": 137.5, "epoch": 0.2495201535508637, "grad_norm": 0.008255716413259506, "kl": 0.03568802773952484, "learning_rate": 4.5544445380631127e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 260 }, { "completion_length": 111.78572082519531, "epoch": 0.2504798464491363, "grad_norm": 0.951898455619812, "kl": 0.0375748872756958, "learning_rate": 4.54981531537172e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 261 }, { "completion_length": 137.35714721679688, "epoch": 0.2514395393474088, "grad_norm": 1.001933217048645, "kl": 0.04836565628647804, "learning_rate": 4.5451645466858325e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 262 }, { "completion_length": 187.92857360839844, "epoch": 0.2523992322456814, "grad_norm": 0.0051013026386499405, "kl": 0.023303594440221786, "learning_rate": 4.5404922808905543e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 263 }, { "completion_length": 162.5, "epoch": 0.2533589251439539, "grad_norm": 0.7496086359024048, "kl": 0.033655036240816116, "learning_rate": 4.5357985670969505e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 264 }, { "completion_length": 175.42857360839844, "epoch": 0.2543186180422265, "grad_norm": 0.8951314091682434, "kl": 0.03823316842317581, "learning_rate": 4.5310834546415303e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 265 }, { "completion_length": 175.6428680419922, "epoch": 0.255278310940499, "grad_norm": 0.006172217894345522, "kl": 0.026982121169567108, "learning_rate": 4.5263469930857283e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 266 }, { "completion_length": 149.7857208251953, "epoch": 0.2562380038387716, "grad_norm": 0.6316816210746765, "kl": 0.02901306003332138, "learning_rate": 4.5215892322153824e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 267 }, { "completion_length": 195.42857360839844, "epoch": 0.2571976967370441, "grad_norm": 0.009285200387239456, "kl": 0.040833741426467896, "learning_rate": 4.516810222040213e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 268 }, { "completion_length": 205.42857360839844, "epoch": 0.2581573896353167, "grad_norm": 0.8348208665847778, "kl": 0.023592347279191017, "learning_rate": 4.5120100127932954e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 269 }, { "completion_length": 162.92857360839844, "epoch": 0.2591170825335892, "grad_norm": 0.00790230929851532, "kl": 0.036873094737529755, "learning_rate": 4.507188654930532e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 270 }, { "completion_length": 182.85714721679688, "epoch": 0.2600767754318618, "grad_norm": 0.8380404710769653, "kl": 0.02413034997880459, "learning_rate": 4.5023461991301216e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 271 }, { "completion_length": 153.42857360839844, "epoch": 0.26103646833013433, "grad_norm": 0.0056807552464306355, "kl": 0.02655276283621788, "learning_rate": 4.49748269629203e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 272 }, { "completion_length": 177.07144165039062, "epoch": 0.2619961612284069, "grad_norm": 0.005335693713277578, "kl": 0.026443200185894966, "learning_rate": 4.492598197537449e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 273 }, { "completion_length": 186.21429443359375, "epoch": 0.2629558541266795, "grad_norm": 0.3925122916698456, "kl": 0.036527182906866074, "learning_rate": 4.4876927542082654e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 274 }, { "completion_length": 148.57144165039062, "epoch": 0.263915547024952, "grad_norm": 0.4555286467075348, "kl": 0.04112144187092781, "learning_rate": 4.482766417866517e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 275 }, { "completion_length": 166.21429443359375, "epoch": 0.2648752399232246, "grad_norm": 0.6400704383850098, "kl": 0.02960377000272274, "learning_rate": 4.477819240293852e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 276 }, { "completion_length": 189.6428680419922, "epoch": 0.2658349328214971, "grad_norm": 0.6090962886810303, "kl": 0.022824209183454514, "learning_rate": 4.472851273490984e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 277 }, { "completion_length": 150.71429443359375, "epoch": 0.2667946257197697, "grad_norm": 0.5049095749855042, "kl": 0.03688105568289757, "learning_rate": 4.467862569677148e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 278 }, { "completion_length": 151.35714721679688, "epoch": 0.2677543186180422, "grad_norm": 1.3763481378555298, "kl": 0.03445395827293396, "learning_rate": 4.4628531812895475e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 279 }, { "completion_length": 162.35714721679688, "epoch": 0.2687140115163148, "grad_norm": 0.0067557357251644135, "kl": 0.027300085872411728, "learning_rate": 4.4578231609828065e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 280 }, { "completion_length": 164.1428680419922, "epoch": 0.2696737044145873, "grad_norm": 0.007649392820894718, "kl": 0.029623623937368393, "learning_rate": 4.4527725616284163e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 281 }, { "completion_length": 174.85714721679688, "epoch": 0.2706333973128599, "grad_norm": 0.00472651980817318, "kl": 0.02263381890952587, "learning_rate": 4.4477014363141755e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 282 }, { "completion_length": 196.35714721679688, "epoch": 0.2715930902111324, "grad_norm": 1.427958607673645, "kl": 0.023354561999440193, "learning_rate": 4.442609838343638e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 283 }, { "completion_length": 137.71429443359375, "epoch": 0.272552783109405, "grad_norm": 0.005701310001313686, "kl": 0.027103224769234657, "learning_rate": 4.437497821235547e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 284 }, { "completion_length": 156.35714721679688, "epoch": 0.27351247600767753, "grad_norm": 1.2402467727661133, "kl": 0.03600984066724777, "learning_rate": 4.432365438723277e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 285 }, { "completion_length": 176.6428680419922, "epoch": 0.2744721689059501, "grad_norm": 0.008030042983591557, "kl": 0.027459513396024704, "learning_rate": 4.427212744754265e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 286 }, { "completion_length": 151.92857360839844, "epoch": 0.27543186180422263, "grad_norm": 0.00565135246142745, "kl": 0.029424842447042465, "learning_rate": 4.422039793489448e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 287 }, { "completion_length": 180.35714721679688, "epoch": 0.2763915547024952, "grad_norm": 0.5180087089538574, "kl": 0.023782411590218544, "learning_rate": 4.41684663930269e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 288 }, { "completion_length": 201.21429443359375, "epoch": 0.27735124760076774, "grad_norm": 1.2534946203231812, "kl": 0.023041196167469025, "learning_rate": 4.4116333367802127e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 289 }, { "completion_length": 178.35714721679688, "epoch": 0.2783109404990403, "grad_norm": 0.5227674245834351, "kl": 0.027517788112163544, "learning_rate": 4.406399940720019e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 290 }, { "completion_length": 176.42857360839844, "epoch": 0.27927063339731284, "grad_norm": 0.005286243744194508, "kl": 0.026009848341345787, "learning_rate": 4.401146506131319e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 291 }, { "completion_length": 173.42857360839844, "epoch": 0.2802303262955854, "grad_norm": 1.7006956338882446, "kl": 0.02112499810755253, "learning_rate": 4.3958730882339534e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 292 }, { "completion_length": 202.92857360839844, "epoch": 0.28119001919385794, "grad_norm": 0.4269435405731201, "kl": 0.026450153440237045, "learning_rate": 4.3905797424578093e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 293 }, { "completion_length": 173.6428680419922, "epoch": 0.2821497120921305, "grad_norm": 0.004021416883915663, "kl": 0.02275986038148403, "learning_rate": 4.38526652444224e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 294 }, { "completion_length": 161.2857208251953, "epoch": 0.28310940499040305, "grad_norm": 1.0972654819488525, "kl": 0.0373879037797451, "learning_rate": 4.379933490035481e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 295 }, { "completion_length": 168.6428680419922, "epoch": 0.2840690978886756, "grad_norm": 0.005759912542998791, "kl": 0.02646731212735176, "learning_rate": 4.37458069529406e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 296 }, { "completion_length": 145.57144165039062, "epoch": 0.28502879078694815, "grad_norm": 0.9021885991096497, "kl": 0.04331574961543083, "learning_rate": 4.369208196482209e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 297 }, { "completion_length": 151.1428680419922, "epoch": 0.28598848368522073, "grad_norm": 1.3297094106674194, "kl": 0.025562448427081108, "learning_rate": 4.363816050071275e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 298 }, { "completion_length": 183.6428680419922, "epoch": 0.2869481765834933, "grad_norm": 0.004989201668649912, "kl": 0.0278471652418375, "learning_rate": 4.3584043127391224e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 299 }, { "completion_length": 171.00001525878906, "epoch": 0.28790786948176583, "grad_norm": 0.006969420239329338, "kl": 0.034058988094329834, "learning_rate": 4.3529730413695416e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 300 }, { "completion_length": 151.71429443359375, "epoch": 0.2888675623800384, "grad_norm": 0.008304917253553867, "kl": 0.037503212690353394, "learning_rate": 4.3475222930516473e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 301 }, { "completion_length": 112.78572082519531, "epoch": 0.28982725527831094, "grad_norm": 0.004472906701266766, "kl": 0.022937115281820297, "learning_rate": 4.342052125079282e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 302 }, { "completion_length": 180.00001525878906, "epoch": 0.2907869481765835, "grad_norm": 0.006619698368012905, "kl": 0.025007689371705055, "learning_rate": 4.336562594950409e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 303 }, { "completion_length": 193.21429443359375, "epoch": 0.29174664107485604, "grad_norm": 0.38067126274108887, "kl": 0.03443700075149536, "learning_rate": 4.3310537603665133e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 304 }, { "completion_length": 179.2857208251953, "epoch": 0.2927063339731286, "grad_norm": 0.004999150987714529, "kl": 0.02596285007894039, "learning_rate": 4.3255256792319914e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 305 }, { "completion_length": 167.6428680419922, "epoch": 0.29366602687140114, "grad_norm": 0.005507142748683691, "kl": 0.032811325043439865, "learning_rate": 4.319978409653545e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 306 }, { "completion_length": 178.00001525878906, "epoch": 0.2946257197696737, "grad_norm": 0.5633144974708557, "kl": 0.02833683229982853, "learning_rate": 4.314412009939568e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 307 }, { "completion_length": 192.00001525878906, "epoch": 0.29558541266794625, "grad_norm": 1.5086309909820557, "kl": 0.025650884956121445, "learning_rate": 4.3088265385995353e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 308 }, { "completion_length": 181.85714721679688, "epoch": 0.2965451055662188, "grad_norm": 0.6449000835418701, "kl": 0.036819376051425934, "learning_rate": 4.303222054343387e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 309 }, { "completion_length": 186.1428680419922, "epoch": 0.29750479846449135, "grad_norm": 0.017945080995559692, "kl": 0.0517929308116436, "learning_rate": 4.297598616080912e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 310 }, { "completion_length": 174.1428680419922, "epoch": 0.29846449136276393, "grad_norm": 0.4481299817562103, "kl": 0.051047150045633316, "learning_rate": 4.291956282921128e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 311 }, { "completion_length": 128.85714721679688, "epoch": 0.29942418426103645, "grad_norm": 0.02026240900158882, "kl": 0.05592488497495651, "learning_rate": 4.28629511417166e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 312 }, { "completion_length": 202.07144165039062, "epoch": 0.30038387715930903, "grad_norm": 0.6841912865638733, "kl": 0.02694413624703884, "learning_rate": 4.280615169338119e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 313 }, { "completion_length": 183.50001525878906, "epoch": 0.30134357005758156, "grad_norm": 0.8363072276115417, "kl": 0.02610953524708748, "learning_rate": 4.274916508123474e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 314 }, { "completion_length": 162.35714721679688, "epoch": 0.30230326295585414, "grad_norm": 0.0039306944236159325, "kl": 0.021738877519965172, "learning_rate": 4.269199190427424e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 315 }, { "completion_length": 183.42857360839844, "epoch": 0.30326295585412666, "grad_norm": 0.005643991753458977, "kl": 0.027759358286857605, "learning_rate": 4.263463276345772e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 316 }, { "completion_length": 166.07144165039062, "epoch": 0.30422264875239924, "grad_norm": 0.008501977659761906, "kl": 0.04843117669224739, "learning_rate": 4.2577088261697896e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 317 }, { "completion_length": 143.85714721679688, "epoch": 0.30518234165067176, "grad_norm": 0.0056803240440785885, "kl": 0.03515378385782242, "learning_rate": 4.2519359003855864e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 318 }, { "completion_length": 135.1428680419922, "epoch": 0.30614203454894434, "grad_norm": 0.0077020940370857716, "kl": 0.04317330941557884, "learning_rate": 4.2461445596734715e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 319 }, { "completion_length": 178.42857360839844, "epoch": 0.30710172744721687, "grad_norm": 0.007963654585182667, "kl": 0.03942827880382538, "learning_rate": 4.2403348649073167e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 320 }, { "completion_length": 172.50001525878906, "epoch": 0.30806142034548945, "grad_norm": 0.022651169449090958, "kl": 0.051504459232091904, "learning_rate": 4.234506877153916e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 321 }, { "completion_length": 181.35714721679688, "epoch": 0.30902111324376197, "grad_norm": 0.006648566573858261, "kl": 0.03015914000570774, "learning_rate": 4.2286606576723466e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 322 }, { "completion_length": 176.07144165039062, "epoch": 0.30998080614203455, "grad_norm": 0.009471630677580833, "kl": 0.03626704216003418, "learning_rate": 4.2227962679133213e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 323 }, { "completion_length": 134.0, "epoch": 0.31094049904030713, "grad_norm": 0.005930949002504349, "kl": 0.0347946397960186, "learning_rate": 4.2169137695185436e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 324 }, { "completion_length": 187.50001525878906, "epoch": 0.31190019193857965, "grad_norm": 0.004862929228693247, "kl": 0.0243705902248621, "learning_rate": 4.2110132243200614e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.4285714626312256, "rewards/check_similarity_func": 0.4285714626312256, "step": 325 }, { "completion_length": 190.50001525878906, "epoch": 0.31285988483685223, "grad_norm": 0.68247389793396, "kl": 0.0297255739569664, "learning_rate": 4.2050946943396164e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 326 }, { "completion_length": 195.1428680419922, "epoch": 0.31381957773512476, "grad_norm": 0.005015912931412458, "kl": 0.02628052793443203, "learning_rate": 4.1991582417879903e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 327 }, { "completion_length": 210.85714721679688, "epoch": 0.31477927063339733, "grad_norm": 0.006230570375919342, "kl": 0.03398096561431885, "learning_rate": 4.193203929064353e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 328 }, { "completion_length": 127.5714340209961, "epoch": 0.31573896353166986, "grad_norm": 0.0074079385958611965, "kl": 0.038374364376068115, "learning_rate": 4.1872318187556057e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 329 }, { "completion_length": 180.7857208251953, "epoch": 0.31669865642994244, "grad_norm": 0.004160887561738491, "kl": 0.02150154858827591, "learning_rate": 4.181241973635723e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 330 }, { "completion_length": 144.7857208251953, "epoch": 0.31765834932821496, "grad_norm": 0.009824509732425213, "kl": 0.046356454491615295, "learning_rate": 4.175234456665095e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 331 }, { "completion_length": 143.7857208251953, "epoch": 0.31861804222648754, "grad_norm": 0.007815330289304256, "kl": 0.03965819999575615, "learning_rate": 4.1692093309898633e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 332 }, { "completion_length": 179.57144165039062, "epoch": 0.31957773512476007, "grad_norm": 0.8374554514884949, "kl": 0.02772437408566475, "learning_rate": 4.163166659941258e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 333 }, { "completion_length": 120.28572082519531, "epoch": 0.32053742802303264, "grad_norm": 0.005397236440330744, "kl": 0.04791003838181496, "learning_rate": 4.15710650703493e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 334 }, { "completion_length": 115.71429443359375, "epoch": 0.32149712092130517, "grad_norm": 0.5861750841140747, "kl": 0.0371648333966732, "learning_rate": 4.1510289359702875e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 335 }, { "completion_length": 185.07144165039062, "epoch": 0.32245681381957775, "grad_norm": 0.7507971525192261, "kl": 0.02877858467400074, "learning_rate": 4.1449340106298246e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 336 }, { "completion_length": 158.07144165039062, "epoch": 0.32341650671785027, "grad_norm": 0.01184576191008091, "kl": 0.05202249065041542, "learning_rate": 4.1388217950784465e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 337 }, { "completion_length": 199.57144165039062, "epoch": 0.32437619961612285, "grad_norm": 0.01030796766281128, "kl": 0.04310651496052742, "learning_rate": 4.132692353562803e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 338 }, { "completion_length": 184.2857208251953, "epoch": 0.3253358925143954, "grad_norm": 0.007778528146445751, "kl": 0.03586255759000778, "learning_rate": 4.126545750510605e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 339 }, { "completion_length": 133.5, "epoch": 0.32629558541266795, "grad_norm": 0.8875092267990112, "kl": 0.039319075644016266, "learning_rate": 4.120382050529956e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 340 }, { "completion_length": 181.2857208251953, "epoch": 0.3272552783109405, "grad_norm": 0.7580294013023376, "kl": 0.0274242851883173, "learning_rate": 4.114201318408665e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 341 }, { "completion_length": 168.2857208251953, "epoch": 0.32821497120921306, "grad_norm": 0.45875948667526245, "kl": 0.03460704907774925, "learning_rate": 4.108003619113571e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 342 }, { "completion_length": 134.57144165039062, "epoch": 0.3291746641074856, "grad_norm": 1.4051810503005981, "kl": 0.04074370115995407, "learning_rate": 4.1017890177898573e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 343 }, { "completion_length": 203.21429443359375, "epoch": 0.33013435700575816, "grad_norm": 0.6904208064079285, "kl": 0.02201230265200138, "learning_rate": 4.0955575797603674e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 344 }, { "completion_length": 136.57144165039062, "epoch": 0.3310940499040307, "grad_norm": 0.005353220272809267, "kl": 0.03351317346096039, "learning_rate": 4.0893093705249207e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 345 }, { "completion_length": 158.7857208251953, "epoch": 0.33205374280230326, "grad_norm": 0.008134887553751469, "kl": 0.0438324436545372, "learning_rate": 4.083044455759617e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 346 }, { "completion_length": 168.5, "epoch": 0.3330134357005758, "grad_norm": 1.0763109922409058, "kl": 0.04183940961956978, "learning_rate": 4.076762901316157e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 347 }, { "completion_length": 200.71429443359375, "epoch": 0.33397312859884837, "grad_norm": 0.007020506542176008, "kl": 0.03355221450328827, "learning_rate": 4.0704647732211404e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 348 }, { "completion_length": 140.0, "epoch": 0.33493282149712095, "grad_norm": 0.009831862524151802, "kl": 0.04611773416399956, "learning_rate": 4.064150137675377e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 349 }, { "completion_length": 133.42857360839844, "epoch": 0.33589251439539347, "grad_norm": 0.9561317563056946, "kl": 0.03511285409331322, "learning_rate": 4.05781906105319e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 350 }, { "completion_length": 149.1428680419922, "epoch": 0.33685220729366605, "grad_norm": 0.028074661269783974, "kl": 0.07820842415094376, "learning_rate": 4.051471609901716e-07, "loss": 0.0001, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 351 }, { "completion_length": 135.42857360839844, "epoch": 0.3378119001919386, "grad_norm": 0.5171915888786316, "kl": 0.04309673607349396, "learning_rate": 4.0451078509402087e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 352 }, { "completion_length": 139.5, "epoch": 0.33877159309021115, "grad_norm": 0.007418831344693899, "kl": 0.03595514968037605, "learning_rate": 4.038727851059336e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 353 }, { "completion_length": 209.00001525878906, "epoch": 0.3397312859884837, "grad_norm": 0.004803547635674477, "kl": 0.023838508874177933, "learning_rate": 4.0323316773204774e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 354 }, { "completion_length": 174.1428680419922, "epoch": 0.34069097888675626, "grad_norm": 0.9459902048110962, "kl": 0.029071485623717308, "learning_rate": 4.025919396955019e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 355 }, { "completion_length": 148.35714721679688, "epoch": 0.3416506717850288, "grad_norm": 0.6732973456382751, "kl": 0.04738449305295944, "learning_rate": 4.0194910773636435e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 356 }, { "completion_length": 139.21429443359375, "epoch": 0.34261036468330136, "grad_norm": 0.008187681436538696, "kl": 0.038412824273109436, "learning_rate": 4.0130467861156324e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 357 }, { "completion_length": 178.1428680419922, "epoch": 0.3435700575815739, "grad_norm": 0.004983143415302038, "kl": 0.02529989555478096, "learning_rate": 4.006586590948141e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 358 }, { "completion_length": 187.00001525878906, "epoch": 0.34452975047984646, "grad_norm": 0.005988486111164093, "kl": 0.02940433844923973, "learning_rate": 4.000110559765496e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 359 }, { "completion_length": 191.92857360839844, "epoch": 0.345489443378119, "grad_norm": 0.004080416169017553, "kl": 0.02454613707959652, "learning_rate": 3.993618760638481e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 360 }, { "completion_length": 177.7857208251953, "epoch": 0.34644913627639157, "grad_norm": 0.0073390225879848, "kl": 0.03108847513794899, "learning_rate": 3.9871112618036175e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 361 }, { "completion_length": 168.7857208251953, "epoch": 0.3474088291746641, "grad_norm": 1.0590665340423584, "kl": 0.02954629622399807, "learning_rate": 3.9805881316624503e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 362 }, { "completion_length": 158.35714721679688, "epoch": 0.34836852207293667, "grad_norm": 0.007501096464693546, "kl": 0.03991718217730522, "learning_rate": 3.974049438780828e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 363 }, { "completion_length": 210.71429443359375, "epoch": 0.3493282149712092, "grad_norm": 0.005684204399585724, "kl": 0.03143526613712311, "learning_rate": 3.967495251888181e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 364 }, { "completion_length": 193.7857208251953, "epoch": 0.3502879078694818, "grad_norm": 0.8045049905776978, "kl": 0.035105150192976, "learning_rate": 3.960925639876802e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 365 }, { "completion_length": 198.71429443359375, "epoch": 0.3512476007677543, "grad_norm": 0.8791664838790894, "kl": 0.03433983772993088, "learning_rate": 3.954340671801119e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 366 }, { "completion_length": 162.71429443359375, "epoch": 0.3522072936660269, "grad_norm": 0.009052729234099388, "kl": 0.044701818376779556, "learning_rate": 3.94774041687697e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 367 }, { "completion_length": 135.21429443359375, "epoch": 0.3531669865642994, "grad_norm": 1.3762645721435547, "kl": 0.05255107581615448, "learning_rate": 3.9411249444808776e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 368 }, { "completion_length": 173.35714721679688, "epoch": 0.354126679462572, "grad_norm": 0.5099905133247375, "kl": 0.03488340973854065, "learning_rate": 3.9344943241493155e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 369 }, { "completion_length": 170.5, "epoch": 0.3550863723608445, "grad_norm": 0.0060741533525288105, "kl": 0.03138230741024017, "learning_rate": 3.927848625577983e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 370 }, { "completion_length": 156.71429443359375, "epoch": 0.3560460652591171, "grad_norm": 1.123826026916504, "kl": 0.04131479561328888, "learning_rate": 3.9211879186210675e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 371 }, { "completion_length": 127.42857360839844, "epoch": 0.3570057581573896, "grad_norm": 1.3625502586364746, "kl": 0.07252487540245056, "learning_rate": 3.914512273290513e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 372 }, { "completion_length": 138.2857208251953, "epoch": 0.3579654510556622, "grad_norm": 0.9402722120285034, "kl": 0.05322389304637909, "learning_rate": 3.907821759755285e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 373 }, { "completion_length": 143.0, "epoch": 0.35892514395393477, "grad_norm": 0.9137933254241943, "kl": 0.06058524549007416, "learning_rate": 3.901116448340629e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 374 }, { "completion_length": 210.21429443359375, "epoch": 0.3598848368522073, "grad_norm": 0.023450544103980064, "kl": 0.05203608423471451, "learning_rate": 3.8943964095273354e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 375 }, { "completion_length": 179.85714721679688, "epoch": 0.36084452975047987, "grad_norm": 1.0397217273712158, "kl": 0.03005804494023323, "learning_rate": 3.887661713950996e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 376 }, { "completion_length": 205.07144165039062, "epoch": 0.3618042226487524, "grad_norm": 1.1355233192443848, "kl": 0.03484668955206871, "learning_rate": 3.880912432401264e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 377 }, { "completion_length": 156.21429443359375, "epoch": 0.362763915547025, "grad_norm": 0.7497095465660095, "kl": 0.056507278233766556, "learning_rate": 3.8741486358211086e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 378 }, { "completion_length": 126.0714340209961, "epoch": 0.3637236084452975, "grad_norm": 0.010563752613961697, "kl": 0.04374634847044945, "learning_rate": 3.867370395306068e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 379 }, { "completion_length": 186.92857360839844, "epoch": 0.3646833013435701, "grad_norm": 0.6922144293785095, "kl": 0.03417911380529404, "learning_rate": 3.8605777821035014e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 380 }, { "completion_length": 161.2857208251953, "epoch": 0.3656429942418426, "grad_norm": 0.6933069229125977, "kl": 0.024699684232473373, "learning_rate": 3.853770867611847e-07, "loss": 0.0, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 381 }, { "completion_length": 202.7857208251953, "epoch": 0.3666026871401152, "grad_norm": 1.0760729312896729, "kl": 0.031167039647698402, "learning_rate": 3.8469497233798634e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 382 }, { "completion_length": 166.6428680419922, "epoch": 0.3675623800383877, "grad_norm": 1.1822607517242432, "kl": 0.04168323427438736, "learning_rate": 3.8401144211058804e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 383 }, { "completion_length": 153.7857208251953, "epoch": 0.3685220729366603, "grad_norm": 1.23003089427948, "kl": 0.049891844391822815, "learning_rate": 3.8332650326370475e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 384 }, { "completion_length": 140.42857360839844, "epoch": 0.3694817658349328, "grad_norm": 0.7117332220077515, "kl": 0.03687429428100586, "learning_rate": 3.826401629968574e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 385 }, { "completion_length": 214.35714721679688, "epoch": 0.3704414587332054, "grad_norm": 0.012612152844667435, "kl": 0.046860627830028534, "learning_rate": 3.8195242852429796e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 386 }, { "completion_length": 158.0, "epoch": 0.3714011516314779, "grad_norm": 0.5242277979850769, "kl": 0.034052178263664246, "learning_rate": 3.812633070749326e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 387 }, { "completion_length": 131.35714721679688, "epoch": 0.3723608445297505, "grad_norm": 0.013473105616867542, "kl": 0.07540043443441391, "learning_rate": 3.8057280589224665e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 388 }, { "completion_length": 139.1428680419922, "epoch": 0.373320537428023, "grad_norm": 0.6942633986473083, "kl": 0.07847024500370026, "learning_rate": 3.7988093223422804e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 389 }, { "completion_length": 149.42857360839844, "epoch": 0.3742802303262956, "grad_norm": 0.3675069212913513, "kl": 0.047611135989427567, "learning_rate": 3.7918769337329085e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 390 }, { "completion_length": 193.00001525878906, "epoch": 0.3752399232245681, "grad_norm": 0.004533219616860151, "kl": 0.030036184936761856, "learning_rate": 3.784930965961994e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 391 }, { "completion_length": 149.1428680419922, "epoch": 0.3761996161228407, "grad_norm": 0.011758384294807911, "kl": 0.056427523493766785, "learning_rate": 3.777971492039909e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 392 }, { "completion_length": 193.00001525878906, "epoch": 0.3771593090211132, "grad_norm": 0.633731484413147, "kl": 0.044891607016325, "learning_rate": 3.770998585118994e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 393 }, { "completion_length": 174.92857360839844, "epoch": 0.3781190019193858, "grad_norm": 1.4459065198898315, "kl": 0.0377456434071064, "learning_rate": 3.7640123184927867e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 394 }, { "completion_length": 133.71429443359375, "epoch": 0.3790786948176583, "grad_norm": 0.016182443127036095, "kl": 0.05627846717834473, "learning_rate": 3.7570127655952496e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 395 }, { "completion_length": 130.07144165039062, "epoch": 0.3800383877159309, "grad_norm": 1.0196112394332886, "kl": 0.05639592930674553, "learning_rate": 3.75e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 396 }, { "completion_length": 186.85714721679688, "epoch": 0.3809980806142035, "grad_norm": 0.36781319975852966, "kl": 0.03332144021987915, "learning_rate": 3.742974095419538e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 397 }, { "completion_length": 149.0, "epoch": 0.381957773512476, "grad_norm": 0.00629227003082633, "kl": 0.039144326001405716, "learning_rate": 3.735935125704466e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 398 }, { "completion_length": 117.85714721679688, "epoch": 0.3829174664107486, "grad_norm": 0.6505950689315796, "kl": 0.05095186084508896, "learning_rate": 3.7288831648427217e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 399 }, { "completion_length": 145.07144165039062, "epoch": 0.3838771593090211, "grad_norm": 1.481459617614746, "kl": 0.036330193281173706, "learning_rate": 3.721818286958792e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 400 }, { "completion_length": 216.85714721679688, "epoch": 0.3848368522072937, "grad_norm": 0.865010142326355, "kl": 0.0288600642234087, "learning_rate": 3.714740566312938e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 401 }, { "completion_length": 196.21429443359375, "epoch": 0.3857965451055662, "grad_norm": 0.003643524134531617, "kl": 0.02306034043431282, "learning_rate": 3.7076500773004115e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 402 }, { "completion_length": 137.42857360839844, "epoch": 0.3867562380038388, "grad_norm": 0.8691968321800232, "kl": 0.03865031152963638, "learning_rate": 3.70054689445068e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 403 }, { "completion_length": 168.21429443359375, "epoch": 0.3877159309021113, "grad_norm": 0.007669699378311634, "kl": 0.044668082147836685, "learning_rate": 3.6934310924266313e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 404 }, { "completion_length": 183.07144165039062, "epoch": 0.3886756238003839, "grad_norm": 0.005772280041128397, "kl": 0.036117102950811386, "learning_rate": 3.686302746023802e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 405 }, { "completion_length": 219.2857208251953, "epoch": 0.3896353166986564, "grad_norm": 0.018508102744817734, "kl": 0.07074171304702759, "learning_rate": 3.679161930169582e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 406 }, { "completion_length": 210.50001525878906, "epoch": 0.390595009596929, "grad_norm": 1.5151705741882324, "kl": 0.050125397741794586, "learning_rate": 3.67200871992243e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 407 }, { "completion_length": 234.00001525878906, "epoch": 0.3915547024952015, "grad_norm": 0.5438663959503174, "kl": 0.04336097463965416, "learning_rate": 3.6648431904710856e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 408 }, { "completion_length": 146.5, "epoch": 0.3925143953934741, "grad_norm": 0.008632015436887741, "kl": 0.04932752996683121, "learning_rate": 3.6576654171337763e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 409 }, { "completion_length": 178.92857360839844, "epoch": 0.3934740882917466, "grad_norm": 0.7175394892692566, "kl": 0.03371736407279968, "learning_rate": 3.650475475357429e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 410 }, { "completion_length": 200.92857360839844, "epoch": 0.3944337811900192, "grad_norm": 0.6608917117118835, "kl": 0.13664159178733826, "learning_rate": 3.6432734407168745e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 411 }, { "completion_length": 129.57144165039062, "epoch": 0.39539347408829173, "grad_norm": 0.011737454682588577, "kl": 0.054980646818876266, "learning_rate": 3.636059388914054e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 412 }, { "completion_length": 127.0714340209961, "epoch": 0.3963531669865643, "grad_norm": 0.01816866174340248, "kl": 0.0697537511587143, "learning_rate": 3.6288333957772234e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 413 }, { "completion_length": 159.0, "epoch": 0.39731285988483683, "grad_norm": 0.725745677947998, "kl": 0.03608942776918411, "learning_rate": 3.6215955372601563e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 414 }, { "completion_length": 124.35714721679688, "epoch": 0.3982725527831094, "grad_norm": 0.006231572944670916, "kl": 0.046947233378887177, "learning_rate": 3.614345889441346e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 415 }, { "completion_length": 231.9285888671875, "epoch": 0.39923224568138194, "grad_norm": 0.4338398873806, "kl": 0.02352001890540123, "learning_rate": 3.6070845285232034e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 416 }, { "completion_length": 154.42857360839844, "epoch": 0.4001919385796545, "grad_norm": 0.5745523571968079, "kl": 0.056902844458818436, "learning_rate": 3.5998115308312614e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 417 }, { "completion_length": 150.2857208251953, "epoch": 0.40115163147792704, "grad_norm": 0.6519483327865601, "kl": 0.05775311589241028, "learning_rate": 3.592526972813367e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 418 }, { "completion_length": 204.71429443359375, "epoch": 0.4021113243761996, "grad_norm": 0.0051377941854298115, "kl": 0.02899775840342045, "learning_rate": 3.585230931038879e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 419 }, { "completion_length": 192.42857360839844, "epoch": 0.40307101727447214, "grad_norm": 1.4334899187088013, "kl": 0.031266357749700546, "learning_rate": 3.5779234821978665e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 420 }, { "completion_length": 180.7857208251953, "epoch": 0.4040307101727447, "grad_norm": 0.004692782182246447, "kl": 0.031169544905424118, "learning_rate": 3.570604703100299e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 421 }, { "completion_length": 169.2857208251953, "epoch": 0.4049904030710173, "grad_norm": 0.014182982966303825, "kl": 0.05144822970032692, "learning_rate": 3.56327467067524e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 422 }, { "completion_length": 185.92857360839844, "epoch": 0.4059500959692898, "grad_norm": 0.006316781044006348, "kl": 0.035711660981178284, "learning_rate": 3.5559334619700407e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 423 }, { "completion_length": 131.0, "epoch": 0.4069097888675624, "grad_norm": 0.7075651288032532, "kl": 0.04515380039811134, "learning_rate": 3.548581154149527e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 424 }, { "completion_length": 191.85714721679688, "epoch": 0.40786948176583493, "grad_norm": 0.7084490060806274, "kl": 0.032797060906887054, "learning_rate": 3.54121782449519e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 425 }, { "completion_length": 171.50001525878906, "epoch": 0.4088291746641075, "grad_norm": 0.5779047012329102, "kl": 0.04388323053717613, "learning_rate": 3.533843550404373e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 426 }, { "completion_length": 163.71429443359375, "epoch": 0.40978886756238003, "grad_norm": 0.020728811621665955, "kl": 0.05106998234987259, "learning_rate": 3.5264584093894584e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 427 }, { "completion_length": 174.85714721679688, "epoch": 0.4107485604606526, "grad_norm": 0.45600950717926025, "kl": 0.03970245271921158, "learning_rate": 3.5190624790770536e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 428 }, { "completion_length": 155.35714721679688, "epoch": 0.41170825335892514, "grad_norm": 1.283568024635315, "kl": 0.056247808039188385, "learning_rate": 3.5116558372071726e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 429 }, { "completion_length": 186.00001525878906, "epoch": 0.4126679462571977, "grad_norm": 0.005776817910373211, "kl": 0.037403400987386703, "learning_rate": 3.5042385616324236e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 430 }, { "completion_length": 201.71429443359375, "epoch": 0.41362763915547024, "grad_norm": 0.006083885673433542, "kl": 0.03560447692871094, "learning_rate": 3.496810730317185e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 431 }, { "completion_length": 117.35714721679688, "epoch": 0.4145873320537428, "grad_norm": 1.0896047353744507, "kl": 0.10883072018623352, "learning_rate": 3.4893724213367885e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 432 }, { "completion_length": 147.35714721679688, "epoch": 0.41554702495201534, "grad_norm": 0.006976233329623938, "kl": 0.041094228625297546, "learning_rate": 3.481923712876701e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 433 }, { "completion_length": 144.21429443359375, "epoch": 0.4165067178502879, "grad_norm": 1.1218067407608032, "kl": 0.04460947960615158, "learning_rate": 3.474464683231698e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 434 }, { "completion_length": 133.57144165039062, "epoch": 0.41746641074856045, "grad_norm": 1.378671646118164, "kl": 0.054090891033411026, "learning_rate": 3.466995410805045e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 435 }, { "completion_length": 129.1428680419922, "epoch": 0.418426103646833, "grad_norm": 0.007199520710855722, "kl": 0.03868302330374718, "learning_rate": 3.459515974107667e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 436 }, { "completion_length": 195.42857360839844, "epoch": 0.41938579654510555, "grad_norm": 0.7116246819496155, "kl": 0.03370943292975426, "learning_rate": 3.4520264517573335e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 437 }, { "completion_length": 170.21429443359375, "epoch": 0.42034548944337813, "grad_norm": 0.5725364685058594, "kl": 0.02298462577164173, "learning_rate": 3.444526922477822e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 438 }, { "completion_length": 165.21429443359375, "epoch": 0.42130518234165065, "grad_norm": 0.656553328037262, "kl": 0.03351488336920738, "learning_rate": 3.437017465098095e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 439 }, { "completion_length": 179.07144165039062, "epoch": 0.42226487523992323, "grad_norm": 0.008913520723581314, "kl": 0.04704747721552849, "learning_rate": 3.429498158551473e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 440 }, { "completion_length": 147.85714721679688, "epoch": 0.42322456813819576, "grad_norm": 1.2962566614151, "kl": 0.053325846791267395, "learning_rate": 3.4219690818748035e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 441 }, { "completion_length": 149.92857360839844, "epoch": 0.42418426103646834, "grad_norm": 0.8135597705841064, "kl": 0.04676782339811325, "learning_rate": 3.4144303142076267e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 442 }, { "completion_length": 177.7857208251953, "epoch": 0.42514395393474086, "grad_norm": 0.9162650108337402, "kl": 0.03878163546323776, "learning_rate": 3.406881934791347e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 443 }, { "completion_length": 152.0, "epoch": 0.42610364683301344, "grad_norm": 0.01002577692270279, "kl": 0.05306725576519966, "learning_rate": 3.399324022968403e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 444 }, { "completion_length": 218.00001525878906, "epoch": 0.42706333973128596, "grad_norm": 0.005151334684342146, "kl": 0.029757939279079437, "learning_rate": 3.391756658181427e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 445 }, { "completion_length": 167.07144165039062, "epoch": 0.42802303262955854, "grad_norm": 0.0053740269504487514, "kl": 0.032460227608680725, "learning_rate": 3.3841799199724143e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 446 }, { "completion_length": 191.2857208251953, "epoch": 0.4289827255278311, "grad_norm": 0.0041680228896439075, "kl": 0.02854585275053978, "learning_rate": 3.376593887981886e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 447 }, { "completion_length": 157.35714721679688, "epoch": 0.42994241842610365, "grad_norm": 0.009646262973546982, "kl": 0.06105753034353256, "learning_rate": 3.368998641948051e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 448 }, { "completion_length": 220.7857208251953, "epoch": 0.4309021113243762, "grad_norm": 0.45228099822998047, "kl": 0.03056521713733673, "learning_rate": 3.3613942617059723e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 449 }, { "completion_length": 162.92857360839844, "epoch": 0.43186180422264875, "grad_norm": 0.5566941499710083, "kl": 0.0453774593770504, "learning_rate": 3.3537808271867173e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 450 }, { "completion_length": 164.2857208251953, "epoch": 0.43282149712092133, "grad_norm": 0.6799894571304321, "kl": 0.03226485475897789, "learning_rate": 3.3461584184165324e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 451 }, { "completion_length": 173.2857208251953, "epoch": 0.43378119001919385, "grad_norm": 0.00478921364992857, "kl": 0.035336293280124664, "learning_rate": 3.33852711551599e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 452 }, { "completion_length": 144.92857360839844, "epoch": 0.43474088291746643, "grad_norm": 0.008388218469917774, "kl": 0.052188675850629807, "learning_rate": 3.3308869986991487e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 453 }, { "completion_length": 181.42857360839844, "epoch": 0.43570057581573896, "grad_norm": 0.006992385722696781, "kl": 0.04578002169728279, "learning_rate": 3.323238148272717e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 454 }, { "completion_length": 126.71429443359375, "epoch": 0.43666026871401153, "grad_norm": 0.4828112721443176, "kl": 0.07209442555904388, "learning_rate": 3.315580644635199e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 455 }, { "completion_length": 134.1428680419922, "epoch": 0.43761996161228406, "grad_norm": 0.02030225098133087, "kl": 0.08374566584825516, "learning_rate": 3.3079145682760556e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 456 }, { "completion_length": 155.42857360839844, "epoch": 0.43857965451055664, "grad_norm": 0.0070424373261630535, "kl": 0.045930761843919754, "learning_rate": 3.3002399997748596e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 457 }, { "completion_length": 155.2857208251953, "epoch": 0.43953934740882916, "grad_norm": 1.001696228981018, "kl": 0.04577656462788582, "learning_rate": 3.292557019800445e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 458 }, { "completion_length": 208.1428680419922, "epoch": 0.44049904030710174, "grad_norm": 0.4604964852333069, "kl": 0.5950458645820618, "learning_rate": 3.284865709110059e-07, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 459 }, { "completion_length": 149.1428680419922, "epoch": 0.44145873320537427, "grad_norm": 1.685957908630371, "kl": 0.09975482523441315, "learning_rate": 3.277166148548515e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 460 }, { "completion_length": 194.50001525878906, "epoch": 0.44241842610364684, "grad_norm": 0.5604601502418518, "kl": 0.03504857420921326, "learning_rate": 3.269458419047345e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 461 }, { "completion_length": 143.0, "epoch": 0.44337811900191937, "grad_norm": 0.022528214380145073, "kl": 0.07197027653455734, "learning_rate": 3.261742601623942e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 462 }, { "completion_length": 185.6428680419922, "epoch": 0.44433781190019195, "grad_norm": 0.4564867317676544, "kl": 0.03198310732841492, "learning_rate": 3.254018777380716e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 463 }, { "completion_length": 165.42857360839844, "epoch": 0.44529750479846447, "grad_norm": 0.026625432074069977, "kl": 0.08944541960954666, "learning_rate": 3.2462870275042367e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 464 }, { "completion_length": 182.1428680419922, "epoch": 0.44625719769673705, "grad_norm": 2.008193016052246, "kl": 0.03620162233710289, "learning_rate": 3.2385474332643816e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 465 }, { "completion_length": 177.7857208251953, "epoch": 0.4472168905950096, "grad_norm": 0.005562148988246918, "kl": 0.0375421904027462, "learning_rate": 3.230800076013482e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 466 }, { "completion_length": 140.7857208251953, "epoch": 0.44817658349328215, "grad_norm": 0.008467471227049828, "kl": 0.05556822940707207, "learning_rate": 3.223045037185469e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 467 }, { "completion_length": 157.35714721679688, "epoch": 0.4491362763915547, "grad_norm": 0.006150401197373867, "kl": 0.03930763527750969, "learning_rate": 3.215282398295014e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 468 }, { "completion_length": 135.2857208251953, "epoch": 0.45009596928982726, "grad_norm": 1.1852688789367676, "kl": 0.053686290979385376, "learning_rate": 3.2075122409366755e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 469 }, { "completion_length": 182.50001525878906, "epoch": 0.4510556621880998, "grad_norm": 2.2341229915618896, "kl": 0.06231049448251724, "learning_rate": 3.199734646784039e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 470 }, { "completion_length": 169.71429443359375, "epoch": 0.45201535508637236, "grad_norm": 0.005770351737737656, "kl": 0.03620312735438347, "learning_rate": 3.1919496975888616e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 471 }, { "completion_length": 196.50001525878906, "epoch": 0.45297504798464494, "grad_norm": 0.4466349184513092, "kl": 0.03305955231189728, "learning_rate": 3.184157475180207e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 472 }, { "completion_length": 186.50001525878906, "epoch": 0.45393474088291746, "grad_norm": 0.452243447303772, "kl": 0.0399288535118103, "learning_rate": 3.176358061463593e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 473 }, { "completion_length": 184.07144165039062, "epoch": 0.45489443378119004, "grad_norm": 0.014478935860097408, "kl": 0.04929973930120468, "learning_rate": 3.1685515384201236e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 474 }, { "completion_length": 174.21429443359375, "epoch": 0.45585412667946257, "grad_norm": 0.0047874655574560165, "kl": 0.026750553399324417, "learning_rate": 3.1607379881056327e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 475 }, { "completion_length": 154.35714721679688, "epoch": 0.45681381957773515, "grad_norm": 0.962614893913269, "kl": 0.059144746512174606, "learning_rate": 3.152917492649817e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 476 }, { "completion_length": 184.21429443359375, "epoch": 0.45777351247600767, "grad_norm": 0.016966894268989563, "kl": 0.06124212592840195, "learning_rate": 3.145090134255376e-07, "loss": 0.0001, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 477 }, { "completion_length": 174.42857360839844, "epoch": 0.45873320537428025, "grad_norm": 0.004736247938126326, "kl": 0.029529042541980743, "learning_rate": 3.1372559951971465e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 478 }, { "completion_length": 172.57144165039062, "epoch": 0.4596928982725528, "grad_norm": 0.00641443720087409, "kl": 0.036283768713474274, "learning_rate": 3.129415157821239e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 479 }, { "completion_length": 188.92857360839844, "epoch": 0.46065259117082535, "grad_norm": 0.5845937728881836, "kl": 0.02686132863163948, "learning_rate": 3.12156770454417e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 480 }, { "completion_length": 149.92857360839844, "epoch": 0.4616122840690979, "grad_norm": 0.8813838362693787, "kl": 0.046446289867162704, "learning_rate": 3.1137137178519977e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 481 }, { "completion_length": 176.21429443359375, "epoch": 0.46257197696737046, "grad_norm": 0.007331082131713629, "kl": 0.04181666299700737, "learning_rate": 3.105853280299454e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 482 }, { "completion_length": 174.00001525878906, "epoch": 0.463531669865643, "grad_norm": 0.8677700161933899, "kl": 0.03974275290966034, "learning_rate": 3.0979864745090777e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 483 }, { "completion_length": 145.07144165039062, "epoch": 0.46449136276391556, "grad_norm": 0.9357709884643555, "kl": 0.033406805247068405, "learning_rate": 3.0901133831703434e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 484 }, { "completion_length": 175.92857360839844, "epoch": 0.4654510556621881, "grad_norm": 0.5611467361450195, "kl": 0.044671423733234406, "learning_rate": 3.0822340890387973e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 485 }, { "completion_length": 143.21429443359375, "epoch": 0.46641074856046066, "grad_norm": 1.144417405128479, "kl": 0.03610507771372795, "learning_rate": 3.0743486749351816e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 486 }, { "completion_length": 148.6428680419922, "epoch": 0.4673704414587332, "grad_norm": 1.0641051530838013, "kl": 0.037374626845121384, "learning_rate": 3.066457223744568e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 487 }, { "completion_length": 185.2857208251953, "epoch": 0.46833013435700577, "grad_norm": 0.7822225689888, "kl": 0.02137828804552555, "learning_rate": 3.058559818415485e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 488 }, { "completion_length": 128.2857208251953, "epoch": 0.4692898272552783, "grad_norm": 0.9636138081550598, "kl": 0.04369405284523964, "learning_rate": 3.050656541959046e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 489 }, { "completion_length": 148.21429443359375, "epoch": 0.47024952015355087, "grad_norm": 0.006722169928252697, "kl": 0.03539753705263138, "learning_rate": 3.042747477448078e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 490 }, { "completion_length": 184.2857208251953, "epoch": 0.4712092130518234, "grad_norm": 0.0067064594477415085, "kl": 0.03323163837194443, "learning_rate": 3.034832708016243e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 491 }, { "completion_length": 155.1428680419922, "epoch": 0.472168905950096, "grad_norm": 1.8520830869674683, "kl": 0.06381086260080338, "learning_rate": 3.0269123168571757e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 492 }, { "completion_length": 179.21429443359375, "epoch": 0.4731285988483685, "grad_norm": 0.8368856310844421, "kl": 0.02671428769826889, "learning_rate": 3.0189863872235966e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 493 }, { "completion_length": 181.00001525878906, "epoch": 0.4740882917466411, "grad_norm": 0.0055678412318229675, "kl": 0.033810149878263474, "learning_rate": 3.011055002426443e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 494 }, { "completion_length": 149.6428680419922, "epoch": 0.4750479846449136, "grad_norm": 1.3950964212417603, "kl": 0.03802361339330673, "learning_rate": 3.0031182458339936e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 495 }, { "completion_length": 162.5, "epoch": 0.4760076775431862, "grad_norm": 1.275749921798706, "kl": 0.023918872699141502, "learning_rate": 2.9951762008709904e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 496 }, { "completion_length": 168.21429443359375, "epoch": 0.47696737044145876, "grad_norm": 0.00569526432082057, "kl": 0.0313393771648407, "learning_rate": 2.987228951017762e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 497 }, { "completion_length": 171.07144165039062, "epoch": 0.4779270633397313, "grad_norm": 0.0052275885827839375, "kl": 0.02883880026638508, "learning_rate": 2.979276579809346e-07, "loss": 0.0, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 498 }, { "completion_length": 144.7857208251953, "epoch": 0.47888675623800386, "grad_norm": 1.4750051498413086, "kl": 0.03224356472492218, "learning_rate": 2.9713191708346147e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 499 }, { "completion_length": 128.5, "epoch": 0.4798464491362764, "grad_norm": 1.5553311109542847, "kl": 0.0354074165225029, "learning_rate": 2.9633568077353904e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 500 }, { "completion_length": 166.0, "epoch": 0.48080614203454897, "grad_norm": 1.356405258178711, "kl": 0.026616545394062996, "learning_rate": 2.955389574205569e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 501 }, { "completion_length": 130.1428680419922, "epoch": 0.4817658349328215, "grad_norm": 0.6440948843955994, "kl": 0.06276445090770721, "learning_rate": 2.947417553990244e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 502 }, { "completion_length": 146.42857360839844, "epoch": 0.48272552783109407, "grad_norm": 1.7609686851501465, "kl": 0.04397787153720856, "learning_rate": 2.9394408308848163e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 503 }, { "completion_length": 128.2857208251953, "epoch": 0.4836852207293666, "grad_norm": 1.1433206796646118, "kl": 0.044805098325014114, "learning_rate": 2.931459488734126e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 504 }, { "completion_length": 183.85714721679688, "epoch": 0.4846449136276392, "grad_norm": 1.2511568069458008, "kl": 0.037779826670885086, "learning_rate": 2.923473611431561e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 505 }, { "completion_length": 166.71429443359375, "epoch": 0.4856046065259117, "grad_norm": 1.1855658292770386, "kl": 0.028667420148849487, "learning_rate": 2.915483282918182e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 506 }, { "completion_length": 160.42857360839844, "epoch": 0.4865642994241843, "grad_norm": 0.0067210812121629715, "kl": 0.040691476315259933, "learning_rate": 2.907488587181833e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 507 }, { "completion_length": 147.92857360839844, "epoch": 0.4875239923224568, "grad_norm": 0.7246859073638916, "kl": 0.03574303910136223, "learning_rate": 2.8994896082562674e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 508 }, { "completion_length": 177.21429443359375, "epoch": 0.4884836852207294, "grad_norm": 0.9603646993637085, "kl": 0.03209708631038666, "learning_rate": 2.891486430220258e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 509 }, { "completion_length": 173.35714721679688, "epoch": 0.4894433781190019, "grad_norm": 0.006125732325017452, "kl": 0.03023635782301426, "learning_rate": 2.883479137196714e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 510 }, { "completion_length": 147.92857360839844, "epoch": 0.4904030710172745, "grad_norm": 0.00572172412648797, "kl": 0.03950463980436325, "learning_rate": 2.8754678133517986e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 511 }, { "completion_length": 159.5, "epoch": 0.491362763915547, "grad_norm": 0.5397362112998962, "kl": 0.028387049213051796, "learning_rate": 2.867452542894045e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 512 }, { "completion_length": 157.6428680419922, "epoch": 0.4923224568138196, "grad_norm": 0.7163601517677307, "kl": 0.02990178018808365, "learning_rate": 2.85943341007347e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 513 }, { "completion_length": 128.57144165039062, "epoch": 0.4932821497120921, "grad_norm": 1.8333269357681274, "kl": 0.04864795133471489, "learning_rate": 2.8514104991806864e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 514 }, { "completion_length": 198.21429443359375, "epoch": 0.4942418426103647, "grad_norm": 0.0053583369590342045, "kl": 0.0323624461889267, "learning_rate": 2.8433838945460205e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 515 }, { "completion_length": 158.57144165039062, "epoch": 0.4952015355086372, "grad_norm": 1.2525274753570557, "kl": 0.04377404600381851, "learning_rate": 2.835353680538624e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 516 }, { "completion_length": 176.6428680419922, "epoch": 0.4961612284069098, "grad_norm": 0.9559332728385925, "kl": 0.040696561336517334, "learning_rate": 2.8273199415655887e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 517 }, { "completion_length": 172.07144165039062, "epoch": 0.4971209213051823, "grad_norm": 0.005147178191691637, "kl": 0.03337319940328598, "learning_rate": 2.819282762071055e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 518 }, { "completion_length": 204.21429443359375, "epoch": 0.4980806142034549, "grad_norm": 0.6717579960823059, "kl": 0.026917800307273865, "learning_rate": 2.811242226535329e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 519 }, { "completion_length": 170.7857208251953, "epoch": 0.4990403071017274, "grad_norm": 0.006065472029149532, "kl": 0.03249480202794075, "learning_rate": 2.803198419473994e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 520 }, { "completion_length": 145.42857360839844, "epoch": 0.5, "grad_norm": 0.844271719455719, "kl": 0.053005922585725784, "learning_rate": 2.795151425437019e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 521 }, { "completion_length": 158.57144165039062, "epoch": 0.5009596928982726, "grad_norm": 0.005384603049606085, "kl": 0.03345721960067749, "learning_rate": 2.7871013290078713e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 522 }, { "completion_length": 174.07144165039062, "epoch": 0.5019193857965452, "grad_norm": 1.2282966375350952, "kl": 0.030412212014198303, "learning_rate": 2.779048214802631e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 523 }, { "completion_length": 163.57144165039062, "epoch": 0.5028790786948176, "grad_norm": 0.0051529742777347565, "kl": 0.03502681106328964, "learning_rate": 2.770992167469096e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 524 }, { "completion_length": 190.00001525878906, "epoch": 0.5038387715930902, "grad_norm": 0.005073275417089462, "kl": 0.03120836801826954, "learning_rate": 2.7629332716858967e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 525 }, { "completion_length": 165.42857360839844, "epoch": 0.5047984644913628, "grad_norm": 0.7414900660514832, "kl": 0.03693871572613716, "learning_rate": 2.754871612161601e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 526 }, { "completion_length": 132.21429443359375, "epoch": 0.5057581573896354, "grad_norm": 1.076127529144287, "kl": 0.061931759119033813, "learning_rate": 2.746807273633832e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 527 }, { "completion_length": 121.21429443359375, "epoch": 0.5067178502879078, "grad_norm": 0.49506309628486633, "kl": 0.045896150171756744, "learning_rate": 2.738740340868367e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.4285714626312256, "rewards/check_similarity_func": 0.4285714626312256, "step": 528 }, { "completion_length": 127.28572082519531, "epoch": 0.5076775431861804, "grad_norm": 1.8213541507720947, "kl": 0.04829070717096329, "learning_rate": 2.730670898658255e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 529 }, { "completion_length": 171.71429443359375, "epoch": 0.508637236084453, "grad_norm": 0.6940011978149414, "kl": 0.05297639220952988, "learning_rate": 2.722599031822922e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 530 }, { "completion_length": 163.21429443359375, "epoch": 0.5095969289827256, "grad_norm": 2.122466802597046, "kl": 0.04223693534731865, "learning_rate": 2.714524825207279e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 531 }, { "completion_length": 130.0, "epoch": 0.510556621880998, "grad_norm": 0.7795002460479736, "kl": 0.042812492698431015, "learning_rate": 2.706448363680831e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 532 }, { "completion_length": 165.71429443359375, "epoch": 0.5115163147792706, "grad_norm": 0.9761487245559692, "kl": 0.03972423076629639, "learning_rate": 2.698369732136784e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 533 }, { "completion_length": 151.07144165039062, "epoch": 0.5124760076775432, "grad_norm": 0.004857619293034077, "kl": 0.03942493349313736, "learning_rate": 2.6902890154911564e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 534 }, { "completion_length": 120.5714340209961, "epoch": 0.5134357005758158, "grad_norm": 0.010403042659163475, "kl": 0.06385780870914459, "learning_rate": 2.682206298681879e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 535 }, { "completion_length": 193.1428680419922, "epoch": 0.5143953934740882, "grad_norm": 0.005190599709749222, "kl": 0.03980549797415733, "learning_rate": 2.674121666667911e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 536 }, { "completion_length": 138.21429443359375, "epoch": 0.5153550863723608, "grad_norm": 1.30233895778656, "kl": 0.04275255650281906, "learning_rate": 2.6660352044283404e-07, "loss": 0.0, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 537 }, { "completion_length": 138.35714721679688, "epoch": 0.5163147792706334, "grad_norm": 0.9500021934509277, "kl": 0.0402948334813118, "learning_rate": 2.6579469969614927e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 538 }, { "completion_length": 196.07144165039062, "epoch": 0.517274472168906, "grad_norm": 0.7569490075111389, "kl": 0.034688010811805725, "learning_rate": 2.649857129284038e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 539 }, { "completion_length": 119.92857360839844, "epoch": 0.5182341650671785, "grad_norm": 1.1846843957901, "kl": 0.03834603354334831, "learning_rate": 2.6417656864301005e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 540 }, { "completion_length": 162.0, "epoch": 0.519193857965451, "grad_norm": 1.0362675189971924, "kl": 0.04887138307094574, "learning_rate": 2.633672753450355e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 541 }, { "completion_length": 103.64286041259766, "epoch": 0.5201535508637236, "grad_norm": 1.4723923206329346, "kl": 0.06669270247220993, "learning_rate": 2.625578415411146e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 542 }, { "completion_length": 141.42857360839844, "epoch": 0.5211132437619962, "grad_norm": 0.013025540858507156, "kl": 0.06441746652126312, "learning_rate": 2.6174827573935813e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 543 }, { "completion_length": 179.57144165039062, "epoch": 0.5220729366602687, "grad_norm": 1.5146015882492065, "kl": 0.03895454853773117, "learning_rate": 2.6093858644926475e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 544 }, { "completion_length": 157.0, "epoch": 0.5230326295585412, "grad_norm": 1.245793104171753, "kl": 0.036515235900878906, "learning_rate": 2.6012878218163093e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 545 }, { "completion_length": 143.07144165039062, "epoch": 0.5239923224568138, "grad_norm": 0.8265988826751709, "kl": 0.05219345539808273, "learning_rate": 2.5931887144846154e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 546 }, { "completion_length": 100.71428680419922, "epoch": 0.5249520153550864, "grad_norm": 0.006222426891326904, "kl": 0.046895429491996765, "learning_rate": 2.5850886276288086e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 547 }, { "completion_length": 163.2857208251953, "epoch": 0.525911708253359, "grad_norm": 0.3309767544269562, "kl": 0.03943851217627525, "learning_rate": 2.5769876463904263e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 548 }, { "completion_length": 154.7857208251953, "epoch": 0.5268714011516314, "grad_norm": 0.6805957555770874, "kl": 0.04449086636304855, "learning_rate": 2.5688858559204053e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 549 }, { "completion_length": 145.6428680419922, "epoch": 0.527831094049904, "grad_norm": 0.6731678247451782, "kl": 0.05053482949733734, "learning_rate": 2.56078334137819e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 550 }, { "completion_length": 132.5, "epoch": 0.5287907869481766, "grad_norm": 0.8366532921791077, "kl": 0.03804032504558563, "learning_rate": 2.552680187930836e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 551 }, { "completion_length": 170.6428680419922, "epoch": 0.5297504798464492, "grad_norm": 1.1984949111938477, "kl": 0.035749733448028564, "learning_rate": 2.5445764807521145e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 552 }, { "completion_length": 211.7857208251953, "epoch": 0.5307101727447217, "grad_norm": 0.6591501235961914, "kl": 0.030941210687160492, "learning_rate": 2.536472305021616e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 553 }, { "completion_length": 130.2857208251953, "epoch": 0.5316698656429942, "grad_norm": 1.2223154306411743, "kl": 0.05342891439795494, "learning_rate": 2.5283677459238554e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 554 }, { "completion_length": 162.1428680419922, "epoch": 0.5326295585412668, "grad_norm": 0.6604644656181335, "kl": 0.0540749728679657, "learning_rate": 2.5202628886473805e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 555 }, { "completion_length": 137.0, "epoch": 0.5335892514395394, "grad_norm": 1.1521764993667603, "kl": 0.0504867359995842, "learning_rate": 2.5121578183838685e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 556 }, { "completion_length": 180.71429443359375, "epoch": 0.5345489443378119, "grad_norm": 1.3548251390457153, "kl": 0.036534275859594345, "learning_rate": 2.5040526203272416e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 557 }, { "completion_length": 182.71429443359375, "epoch": 0.5355086372360844, "grad_norm": 0.7351670265197754, "kl": 0.04434232786297798, "learning_rate": 2.495947379672759e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 558 }, { "completion_length": 143.1428680419922, "epoch": 0.536468330134357, "grad_norm": 1.1608048677444458, "kl": 0.045021459460258484, "learning_rate": 2.4878421816161313e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 559 }, { "completion_length": 206.07144165039062, "epoch": 0.5374280230326296, "grad_norm": 0.7720102667808533, "kl": 0.03170725703239441, "learning_rate": 2.4797371113526203e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 560 }, { "completion_length": 176.92857360839844, "epoch": 0.5383877159309021, "grad_norm": 0.7555896639823914, "kl": 0.0451071597635746, "learning_rate": 2.4716322540761443e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 561 }, { "completion_length": 176.7857208251953, "epoch": 0.5393474088291746, "grad_norm": 0.00628678360953927, "kl": 0.03832141309976578, "learning_rate": 2.463527694978384e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 562 }, { "completion_length": 202.57144165039062, "epoch": 0.5403071017274472, "grad_norm": 0.43252527713775635, "kl": 0.045148350298404694, "learning_rate": 2.455423519247885e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 563 }, { "completion_length": 186.50001525878906, "epoch": 0.5412667946257198, "grad_norm": 0.7092636823654175, "kl": 0.05524356663227081, "learning_rate": 2.447319812069163e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 564 }, { "completion_length": 171.92857360839844, "epoch": 0.5422264875239923, "grad_norm": 1.532195806503296, "kl": 0.04304159805178642, "learning_rate": 2.43921665862181e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 565 }, { "completion_length": 116.92857360839844, "epoch": 0.5431861804222649, "grad_norm": 0.9895479679107666, "kl": 0.060022544115781784, "learning_rate": 2.431114144079595e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 566 }, { "completion_length": 136.21429443359375, "epoch": 0.5441458733205374, "grad_norm": 0.005933710839599371, "kl": 0.04731615260243416, "learning_rate": 2.4230123536095745e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 567 }, { "completion_length": 140.42857360839844, "epoch": 0.54510556621881, "grad_norm": 1.0366408824920654, "kl": 0.075242780148983, "learning_rate": 2.414911372371191e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 568 }, { "completion_length": 165.21429443359375, "epoch": 0.5460652591170825, "grad_norm": 0.0126578314229846, "kl": 0.07264365255832672, "learning_rate": 2.406811285515385e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 569 }, { "completion_length": 162.5, "epoch": 0.5470249520153551, "grad_norm": 0.004996855743229389, "kl": 0.04320121183991432, "learning_rate": 2.398712178183691e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 570 }, { "completion_length": 177.42857360839844, "epoch": 0.5479846449136276, "grad_norm": 0.8546741008758545, "kl": 0.05147384852170944, "learning_rate": 2.3906141355073517e-07, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 571 }, { "completion_length": 181.50001525878906, "epoch": 0.5489443378119002, "grad_norm": 0.9227664470672607, "kl": 0.07095707952976227, "learning_rate": 2.382517242606419e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 572 }, { "completion_length": 169.2857208251953, "epoch": 0.5499040307101728, "grad_norm": 0.3425494432449341, "kl": 0.045387424528598785, "learning_rate": 2.3744215845888543e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 573 }, { "completion_length": 195.71429443359375, "epoch": 0.5508637236084453, "grad_norm": 0.4681277871131897, "kl": 0.03683958202600479, "learning_rate": 2.366327246549645e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 574 }, { "completion_length": 202.21429443359375, "epoch": 0.5518234165067178, "grad_norm": 1.151267409324646, "kl": 0.03698313236236572, "learning_rate": 2.3582343135698999e-07, "loss": 0.0, "reward": 1.25, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 575 }, { "completion_length": 172.92857360839844, "epoch": 0.5527831094049904, "grad_norm": 1.5624014139175415, "kl": 0.05424296855926514, "learning_rate": 2.3501428707159615e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 576 }, { "completion_length": 145.0, "epoch": 0.553742802303263, "grad_norm": 0.006239702459424734, "kl": 0.043371591717004776, "learning_rate": 2.3420530030385076e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 577 }, { "completion_length": 174.92857360839844, "epoch": 0.5547024952015355, "grad_norm": 1.1786164045333862, "kl": 0.03976292908191681, "learning_rate": 2.3339647955716604e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 578 }, { "completion_length": 178.6428680419922, "epoch": 0.555662188099808, "grad_norm": 0.4374527335166931, "kl": 0.03513139486312866, "learning_rate": 2.3258783333320889e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 579 }, { "completion_length": 145.57144165039062, "epoch": 0.5566218809980806, "grad_norm": 1.7757643461227417, "kl": 0.05435565114021301, "learning_rate": 2.3177937013181203e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 580 }, { "completion_length": 159.2857208251953, "epoch": 0.5575815738963532, "grad_norm": 0.5112550854682922, "kl": 0.04683312028646469, "learning_rate": 2.309710984508844e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 581 }, { "completion_length": 170.85714721679688, "epoch": 0.5585412667946257, "grad_norm": 1.1045641899108887, "kl": 0.035446956753730774, "learning_rate": 2.3016302678632155e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 582 }, { "completion_length": 172.00001525878906, "epoch": 0.5595009596928983, "grad_norm": 0.00965283252298832, "kl": 0.05432228744029999, "learning_rate": 2.2935516363191693e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 583 }, { "completion_length": 158.21429443359375, "epoch": 0.5604606525911708, "grad_norm": 0.727836012840271, "kl": 0.05464170128107071, "learning_rate": 2.2854751747927208e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 584 }, { "completion_length": 194.42857360839844, "epoch": 0.5614203454894434, "grad_norm": 0.7774348258972168, "kl": 0.031302861869335175, "learning_rate": 2.2774009681770785e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 585 }, { "completion_length": 149.92857360839844, "epoch": 0.5623800383877159, "grad_norm": 1.2991249561309814, "kl": 0.04799596592783928, "learning_rate": 2.2693291013417452e-07, "loss": 0.0, "reward": 1.25, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 586 }, { "completion_length": 173.21429443359375, "epoch": 0.5633397312859885, "grad_norm": 1.2338955402374268, "kl": 0.04071972519159317, "learning_rate": 2.2612596591316333e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 587 }, { "completion_length": 204.21429443359375, "epoch": 0.564299424184261, "grad_norm": 0.7531063556671143, "kl": 0.03159426152706146, "learning_rate": 2.2531927263661685e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 588 }, { "completion_length": 179.7857208251953, "epoch": 0.5652591170825336, "grad_norm": 0.47919026017189026, "kl": 0.04101026803255081, "learning_rate": 2.2451283878383983e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 589 }, { "completion_length": 160.07144165039062, "epoch": 0.5662188099808061, "grad_norm": 1.442938208580017, "kl": 0.03974846750497818, "learning_rate": 2.2370667283141036e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 590 }, { "completion_length": 147.7857208251953, "epoch": 0.5671785028790787, "grad_norm": 0.5420333743095398, "kl": 0.05681974068284035, "learning_rate": 2.2290078325309035e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 591 }, { "completion_length": 141.21429443359375, "epoch": 0.5681381957773513, "grad_norm": 1.027714490890503, "kl": 0.049528978765010834, "learning_rate": 2.2209517851973694e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 592 }, { "completion_length": 196.35714721679688, "epoch": 0.5690978886756238, "grad_norm": 1.3300915956497192, "kl": 0.030890943482518196, "learning_rate": 2.2128986709921288e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 593 }, { "completion_length": 176.1428680419922, "epoch": 0.5700575815738963, "grad_norm": 1.8649946451187134, "kl": 0.04617445915937424, "learning_rate": 2.204848574562982e-07, "loss": 0.0, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 594 }, { "completion_length": 152.6428680419922, "epoch": 0.5710172744721689, "grad_norm": 0.5392505526542664, "kl": 0.07711812108755112, "learning_rate": 2.196801580526006e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 595 }, { "completion_length": 111.00000762939453, "epoch": 0.5719769673704415, "grad_norm": 0.6102688312530518, "kl": 0.058839790523052216, "learning_rate": 2.1887577734646704e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 596 }, { "completion_length": 168.57144165039062, "epoch": 0.572936660268714, "grad_norm": 0.7960284948348999, "kl": 0.051914338022470474, "learning_rate": 2.1807172379289452e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 597 }, { "completion_length": 183.1428680419922, "epoch": 0.5738963531669866, "grad_norm": 0.6335731744766235, "kl": 0.06888283789157867, "learning_rate": 2.172680058434411e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 598 }, { "completion_length": 138.0, "epoch": 0.5748560460652591, "grad_norm": 1.3335093259811401, "kl": 0.0631156787276268, "learning_rate": 2.1646463194613759e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 599 }, { "completion_length": 207.00001525878906, "epoch": 0.5758157389635317, "grad_norm": 0.7428359389305115, "kl": 0.05783711373806, "learning_rate": 2.1566161054539795e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 600 }, { "completion_length": 138.35714721679688, "epoch": 0.5767754318618042, "grad_norm": 1.4487627744674683, "kl": 0.06397869437932968, "learning_rate": 2.1485895008193141e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 601 }, { "completion_length": 169.92857360839844, "epoch": 0.5777351247600768, "grad_norm": 1.2852569818496704, "kl": 0.08442845195531845, "learning_rate": 2.1405665899265302e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 602 }, { "completion_length": 172.85714721679688, "epoch": 0.5786948176583493, "grad_norm": 0.6889991760253906, "kl": 0.05007026344537735, "learning_rate": 2.1325474571059557e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 603 }, { "completion_length": 137.35714721679688, "epoch": 0.5796545105566219, "grad_norm": 1.021697759628296, "kl": 0.06963798403739929, "learning_rate": 2.1245321866482015e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 604 }, { "completion_length": 138.42857360839844, "epoch": 0.5806142034548945, "grad_norm": 0.9113430380821228, "kl": 0.060498520731925964, "learning_rate": 2.1165208628032861e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 605 }, { "completion_length": 171.85714721679688, "epoch": 0.581573896353167, "grad_norm": 0.801505446434021, "kl": 0.05136749893426895, "learning_rate": 2.1085135697797424e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 606 }, { "completion_length": 144.35714721679688, "epoch": 0.5825335892514395, "grad_norm": 1.4282052516937256, "kl": 0.06517824530601501, "learning_rate": 2.100510391743732e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 607 }, { "completion_length": 139.57144165039062, "epoch": 0.5834932821497121, "grad_norm": 1.2382047176361084, "kl": 0.058084309101104736, "learning_rate": 2.0925114128181668e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 608 }, { "completion_length": 178.7857208251953, "epoch": 0.5844529750479847, "grad_norm": 0.005752503406256437, "kl": 0.03925778716802597, "learning_rate": 2.0845167170818182e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 609 }, { "completion_length": 183.85714721679688, "epoch": 0.5854126679462572, "grad_norm": 1.011277198791504, "kl": 0.035604845732450485, "learning_rate": 2.0765263885684392e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 610 }, { "completion_length": 143.57144165039062, "epoch": 0.5863723608445297, "grad_norm": 1.1991262435913086, "kl": 0.046813108026981354, "learning_rate": 2.068540511265874e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 611 }, { "completion_length": 164.21429443359375, "epoch": 0.5873320537428023, "grad_norm": 1.163641095161438, "kl": 0.05320117622613907, "learning_rate": 2.060559169115184e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 612 }, { "completion_length": 166.57144165039062, "epoch": 0.5882917466410749, "grad_norm": 1.6466705799102783, "kl": 0.039578940719366074, "learning_rate": 2.0525824460097568e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 613 }, { "completion_length": 137.1428680419922, "epoch": 0.5892514395393474, "grad_norm": 1.0754694938659668, "kl": 0.07313624024391174, "learning_rate": 2.0446104257944302e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 614 }, { "completion_length": 166.5, "epoch": 0.5902111324376199, "grad_norm": 0.9541183710098267, "kl": 0.039802588522434235, "learning_rate": 2.03664319226461e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 615 }, { "completion_length": 160.0, "epoch": 0.5911708253358925, "grad_norm": 1.0466569662094116, "kl": 0.04144104942679405, "learning_rate": 2.028680829165385e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 616 }, { "completion_length": 194.7857208251953, "epoch": 0.5921305182341651, "grad_norm": 1.1286543607711792, "kl": 0.050794120877981186, "learning_rate": 2.0207234201906545e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 617 }, { "completion_length": 182.85714721679688, "epoch": 0.5930902111324377, "grad_norm": 0.8819717764854431, "kl": 0.03890436887741089, "learning_rate": 2.0127710489822385e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 618 }, { "completion_length": 145.42857360839844, "epoch": 0.5940499040307101, "grad_norm": 1.0144169330596924, "kl": 0.05514146015048027, "learning_rate": 2.0048237991290107e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 619 }, { "completion_length": 158.0, "epoch": 0.5950095969289827, "grad_norm": 1.141734004020691, "kl": 0.03990887850522995, "learning_rate": 1.9968817541660067e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 620 }, { "completion_length": 161.57144165039062, "epoch": 0.5959692898272553, "grad_norm": 1.4746026992797852, "kl": 0.04798189550638199, "learning_rate": 1.9889449975735568e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 621 }, { "completion_length": 153.21429443359375, "epoch": 0.5969289827255279, "grad_norm": 0.6212268471717834, "kl": 0.057322338223457336, "learning_rate": 1.9810136127764032e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 622 }, { "completion_length": 153.7857208251953, "epoch": 0.5978886756238004, "grad_norm": 1.4838128089904785, "kl": 0.04499994218349457, "learning_rate": 1.9730876831428233e-07, "loss": 0.0, "reward": 1.25, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 623 }, { "completion_length": 205.42857360839844, "epoch": 0.5988483685220729, "grad_norm": 0.004490947816520929, "kl": 0.03531130403280258, "learning_rate": 1.965167291983757e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 624 }, { "completion_length": 179.07144165039062, "epoch": 0.5998080614203455, "grad_norm": 0.6151396036148071, "kl": 0.04394730180501938, "learning_rate": 1.9572525225519224e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 625 }, { "completion_length": 205.35714721679688, "epoch": 0.6007677543186181, "grad_norm": 1.2582058906555176, "kl": 0.039726920425891876, "learning_rate": 1.9493434580409544e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 626 }, { "completion_length": 157.92857360839844, "epoch": 0.6017274472168906, "grad_norm": 0.5439065098762512, "kl": 0.04480648413300514, "learning_rate": 1.941440181584515e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 627 }, { "completion_length": 205.6428680419922, "epoch": 0.6026871401151631, "grad_norm": 0.9204515218734741, "kl": 0.03155241161584854, "learning_rate": 1.933542776255432e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 628 }, { "completion_length": 174.85714721679688, "epoch": 0.6036468330134357, "grad_norm": 1.4788198471069336, "kl": 0.04827696457505226, "learning_rate": 1.9256513250648182e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 629 }, { "completion_length": 123.35714721679688, "epoch": 0.6046065259117083, "grad_norm": 1.9114413261413574, "kl": 0.051428962498903275, "learning_rate": 1.9177659109612025e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 630 }, { "completion_length": 176.57144165039062, "epoch": 0.6055662188099808, "grad_norm": 0.5349165201187134, "kl": 0.02382807247340679, "learning_rate": 1.909886616829657e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 631 }, { "completion_length": 157.71429443359375, "epoch": 0.6065259117082533, "grad_norm": 1.564664363861084, "kl": 0.055389124900102615, "learning_rate": 1.902013525490922e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 632 }, { "completion_length": 179.42857360839844, "epoch": 0.6074856046065259, "grad_norm": 1.8486913442611694, "kl": 0.04887070879340172, "learning_rate": 1.894146719700546e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 633 }, { "completion_length": 146.71429443359375, "epoch": 0.6084452975047985, "grad_norm": 0.8061076998710632, "kl": 0.049586497247219086, "learning_rate": 1.886286282148002e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 634 }, { "completion_length": 147.92857360839844, "epoch": 0.6094049904030711, "grad_norm": 0.7764348983764648, "kl": 0.05360729619860649, "learning_rate": 1.87843229545583e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 635 }, { "completion_length": 181.42857360839844, "epoch": 0.6103646833013435, "grad_norm": 1.2534022331237793, "kl": 0.03543458878993988, "learning_rate": 1.8705848421787608e-07, "loss": 0.0, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 636 }, { "completion_length": 167.71429443359375, "epoch": 0.6113243761996161, "grad_norm": 1.6810425519943237, "kl": 0.057258982211351395, "learning_rate": 1.8627440048028538e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 637 }, { "completion_length": 184.71429443359375, "epoch": 0.6122840690978887, "grad_norm": 0.9684247970581055, "kl": 0.03546625003218651, "learning_rate": 1.8549098657446244e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 638 }, { "completion_length": 181.71429443359375, "epoch": 0.6132437619961613, "grad_norm": 0.006622991058975458, "kl": 0.047198254615068436, "learning_rate": 1.8470825073501826e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 639 }, { "completion_length": 134.42857360839844, "epoch": 0.6142034548944337, "grad_norm": 1.0228164196014404, "kl": 0.062422774732112885, "learning_rate": 1.8392620118943674e-07, "loss": 0.0001, "reward": 0.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 640 }, { "completion_length": 190.92857360839844, "epoch": 0.6151631477927063, "grad_norm": 1.5742608308792114, "kl": 0.03389148414134979, "learning_rate": 1.831448461579876e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 641 }, { "completion_length": 160.92857360839844, "epoch": 0.6161228406909789, "grad_norm": 0.7597067952156067, "kl": 0.04795638844370842, "learning_rate": 1.8236419385364072e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 642 }, { "completion_length": 164.21429443359375, "epoch": 0.6170825335892515, "grad_norm": 0.646830141544342, "kl": 0.0702909380197525, "learning_rate": 1.8158425248197928e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 643 }, { "completion_length": 206.00001525878906, "epoch": 0.6180422264875239, "grad_norm": 1.157360315322876, "kl": 0.04231950268149376, "learning_rate": 1.8080503024111392e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 644 }, { "completion_length": 110.0714340209961, "epoch": 0.6190019193857965, "grad_norm": 1.1649200916290283, "kl": 0.07593957334756851, "learning_rate": 1.8002653532159605e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 645 }, { "completion_length": 174.50001525878906, "epoch": 0.6199616122840691, "grad_norm": 0.8981557488441467, "kl": 0.03232482820749283, "learning_rate": 1.7924877590633237e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 646 }, { "completion_length": 92.85714721679688, "epoch": 0.6209213051823417, "grad_norm": 2.1949234008789062, "kl": 0.08521221578121185, "learning_rate": 1.784717601704986e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 647 }, { "completion_length": 165.57144165039062, "epoch": 0.6218809980806143, "grad_norm": 1.1382410526275635, "kl": 0.03897751867771149, "learning_rate": 1.7769549628145306e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 648 }, { "completion_length": 126.00000762939453, "epoch": 0.6228406909788867, "grad_norm": 1.3615036010742188, "kl": 0.040236808359622955, "learning_rate": 1.7691999239865176e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 649 }, { "completion_length": 214.2857208251953, "epoch": 0.6238003838771593, "grad_norm": 0.8918425440788269, "kl": 0.02937857061624527, "learning_rate": 1.7614525667356184e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 650 }, { "completion_length": 147.2857208251953, "epoch": 0.6247600767754319, "grad_norm": 1.4499472379684448, "kl": 0.03663484752178192, "learning_rate": 1.753712972495764e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 651 }, { "completion_length": 181.00001525878906, "epoch": 0.6257197696737045, "grad_norm": 1.4961059093475342, "kl": 0.04487420618534088, "learning_rate": 1.7459812226192843e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 652 }, { "completion_length": 174.1428680419922, "epoch": 0.6266794625719769, "grad_norm": 1.0709645748138428, "kl": 0.07213535904884338, "learning_rate": 1.7382573983760583e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 653 }, { "completion_length": 140.85714721679688, "epoch": 0.6276391554702495, "grad_norm": 0.00477975606918335, "kl": 0.044244345277547836, "learning_rate": 1.7305415809526553e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 654 }, { "completion_length": 160.92857360839844, "epoch": 0.6285988483685221, "grad_norm": 1.1343010663986206, "kl": 0.04954404756426811, "learning_rate": 1.7228338514514841e-07, "loss": 0.0, "reward": 1.25, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 655 }, { "completion_length": 133.85714721679688, "epoch": 0.6295585412667947, "grad_norm": 0.005836391821503639, "kl": 0.04575802758336067, "learning_rate": 1.7151342908899413e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 656 }, { "completion_length": 144.35714721679688, "epoch": 0.6305182341650671, "grad_norm": 2.2801005840301514, "kl": 0.05697108805179596, "learning_rate": 1.707442980199555e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 657 }, { "completion_length": 191.1428680419922, "epoch": 0.6314779270633397, "grad_norm": 1.0583109855651855, "kl": 0.04075342416763306, "learning_rate": 1.6997600002251404e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 658 }, { "completion_length": 208.00001525878906, "epoch": 0.6324376199616123, "grad_norm": 0.7784060835838318, "kl": 0.03123745508491993, "learning_rate": 1.6920854317239447e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 659 }, { "completion_length": 192.92857360839844, "epoch": 0.6333973128598849, "grad_norm": 1.0977318286895752, "kl": 0.05046728998422623, "learning_rate": 1.6844193553648022e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 660 }, { "completion_length": 148.6428680419922, "epoch": 0.6343570057581573, "grad_norm": 2.2952232360839844, "kl": 0.037598807364702225, "learning_rate": 1.6767618517272837e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 661 }, { "completion_length": 160.0, "epoch": 0.6353166986564299, "grad_norm": 1.866260051727295, "kl": 0.049420133233070374, "learning_rate": 1.669113001300851e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 662 }, { "completion_length": 158.35714721679688, "epoch": 0.6362763915547025, "grad_norm": 1.847525715827942, "kl": 0.05308343842625618, "learning_rate": 1.6614728844840103e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.5555838942527771, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 663 }, { "completion_length": 165.85714721679688, "epoch": 0.6372360844529751, "grad_norm": 1.1415857076644897, "kl": 0.04242129251360893, "learning_rate": 1.6538415815834665e-07, "loss": 0.0, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 664 }, { "completion_length": 156.2857208251953, "epoch": 0.6381957773512476, "grad_norm": 1.354780673980713, "kl": 0.06304965168237686, "learning_rate": 1.6462191728132825e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 665 }, { "completion_length": 145.07144165039062, "epoch": 0.6391554702495201, "grad_norm": 1.4329015016555786, "kl": 0.050400715321302414, "learning_rate": 1.6386057382940283e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 666 }, { "completion_length": 166.85714721679688, "epoch": 0.6401151631477927, "grad_norm": 1.2051796913146973, "kl": 0.04668375477194786, "learning_rate": 1.6310013580519485e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 667 }, { "completion_length": 147.2857208251953, "epoch": 0.6410748560460653, "grad_norm": 1.9171136617660522, "kl": 0.06445455551147461, "learning_rate": 1.6234061120181143e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 668 }, { "completion_length": 136.0, "epoch": 0.6420345489443378, "grad_norm": 0.015650549903512, "kl": 0.07570502907037735, "learning_rate": 1.615820080027586e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 669 }, { "completion_length": 146.6428680419922, "epoch": 0.6429942418426103, "grad_norm": 1.5918002128601074, "kl": 0.046448271721601486, "learning_rate": 1.608243341818573e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 670 }, { "completion_length": 212.50001525878906, "epoch": 0.6439539347408829, "grad_norm": 0.00435894588008523, "kl": 0.03422051668167114, "learning_rate": 1.6006759770315976e-07, "loss": 0.0, "reward": 0.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 671 }, { "completion_length": 231.7857208251953, "epoch": 0.6449136276391555, "grad_norm": 0.0049050976522266865, "kl": 0.036567289382219315, "learning_rate": 1.5931180652086528e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 672 }, { "completion_length": 156.1428680419922, "epoch": 0.6458733205374281, "grad_norm": 1.3731286525726318, "kl": 0.050037965178489685, "learning_rate": 1.5855696857923736e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 673 }, { "completion_length": 150.57144165039062, "epoch": 0.6468330134357005, "grad_norm": 1.3489683866500854, "kl": 0.05751172453165054, "learning_rate": 1.5780309181251965e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 674 }, { "completion_length": 113.71429443359375, "epoch": 0.6477927063339731, "grad_norm": 1.9421262741088867, "kl": 0.09791994839906693, "learning_rate": 1.570501841448526e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 675 }, { "completion_length": 159.6428680419922, "epoch": 0.6487523992322457, "grad_norm": 1.6893839836120605, "kl": 0.050973083823919296, "learning_rate": 1.5629825349019052e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 676 }, { "completion_length": 105.5714340209961, "epoch": 0.6497120921305183, "grad_norm": 1.350937008857727, "kl": 0.062127720564603806, "learning_rate": 1.5554730775221786e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.4285714626312256, "rewards/check_similarity_func": 0.4285714626312256, "step": 677 }, { "completion_length": 164.35714721679688, "epoch": 0.6506717850287908, "grad_norm": 0.9317610263824463, "kl": 0.046063248068094254, "learning_rate": 1.5479735482426673e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 678 }, { "completion_length": 149.57144165039062, "epoch": 0.6516314779270633, "grad_norm": 0.007995160296559334, "kl": 0.05953742936253548, "learning_rate": 1.540484025892333e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 679 }, { "completion_length": 210.07144165039062, "epoch": 0.6525911708253359, "grad_norm": 1.5567470788955688, "kl": 0.04421737417578697, "learning_rate": 1.5330045891949555e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 680 }, { "completion_length": 174.85714721679688, "epoch": 0.6535508637236085, "grad_norm": 1.447144865989685, "kl": 0.044047243893146515, "learning_rate": 1.5255353167683017e-07, "loss": 0.0, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 681 }, { "completion_length": 166.0, "epoch": 0.654510556621881, "grad_norm": 1.5141979455947876, "kl": 0.05018081143498421, "learning_rate": 1.5180762871232986e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 682 }, { "completion_length": 193.57144165039062, "epoch": 0.6554702495201535, "grad_norm": 0.7286568284034729, "kl": 0.05122958868741989, "learning_rate": 1.510627578663211e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 683 }, { "completion_length": 200.42857360839844, "epoch": 0.6564299424184261, "grad_norm": 1.606034278869629, "kl": 0.048463959246873856, "learning_rate": 1.503189269682815e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 684 }, { "completion_length": 154.42857360839844, "epoch": 0.6573896353166987, "grad_norm": 1.5067282915115356, "kl": 0.05786016583442688, "learning_rate": 1.4957614383675767e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 685 }, { "completion_length": 204.00001525878906, "epoch": 0.6583493282149712, "grad_norm": 1.3477709293365479, "kl": 0.04527405649423599, "learning_rate": 1.4883441627928272e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 686 }, { "completion_length": 151.2857208251953, "epoch": 0.6593090211132437, "grad_norm": 1.299048900604248, "kl": 0.07783549278974533, "learning_rate": 1.480937520922947e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 687 }, { "completion_length": 161.35714721679688, "epoch": 0.6602687140115163, "grad_norm": 0.8050122261047363, "kl": 0.05560287833213806, "learning_rate": 1.4735415906105417e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 688 }, { "completion_length": 174.07144165039062, "epoch": 0.6612284069097889, "grad_norm": 1.107659101486206, "kl": 0.05306408926844597, "learning_rate": 1.4661564495956268e-07, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 689 }, { "completion_length": 180.6428680419922, "epoch": 0.6621880998080614, "grad_norm": 1.7337760925292969, "kl": 0.05051814764738083, "learning_rate": 1.4587821755048097e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 690 }, { "completion_length": 148.07144165039062, "epoch": 0.663147792706334, "grad_norm": 1.6371203660964966, "kl": 0.07003616541624069, "learning_rate": 1.4514188458504724e-07, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 691 }, { "completion_length": 113.5714340209961, "epoch": 0.6641074856046065, "grad_norm": 1.4453927278518677, "kl": 0.0649074837565422, "learning_rate": 1.4440665380299593e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 692 }, { "completion_length": 211.42857360839844, "epoch": 0.6650671785028791, "grad_norm": 1.3729969263076782, "kl": 0.036822009831666946, "learning_rate": 1.43672532932476e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 693 }, { "completion_length": 147.35714721679688, "epoch": 0.6660268714011516, "grad_norm": 1.2146183252334595, "kl": 0.08302807807922363, "learning_rate": 1.4293952968997022e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 694 }, { "completion_length": 201.92857360839844, "epoch": 0.6669865642994242, "grad_norm": 1.300707459449768, "kl": 0.036797694861888885, "learning_rate": 1.4220765178021343e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 695 }, { "completion_length": 162.71429443359375, "epoch": 0.6679462571976967, "grad_norm": 1.3128079175949097, "kl": 0.05062852427363396, "learning_rate": 1.4147690689611215e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 696 }, { "completion_length": 114.78572082519531, "epoch": 0.6689059500959693, "grad_norm": 1.947687029838562, "kl": 0.07400361448526382, "learning_rate": 1.407473027186633e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 697 }, { "completion_length": 194.07144165039062, "epoch": 0.6698656429942419, "grad_norm": 1.1122840642929077, "kl": 0.041573118418455124, "learning_rate": 1.400188469168738e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 698 }, { "completion_length": 143.42857360839844, "epoch": 0.6708253358925144, "grad_norm": 1.0786800384521484, "kl": 0.0642821416258812, "learning_rate": 1.3929154714767966e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 699 }, { "completion_length": 168.7857208251953, "epoch": 0.6717850287907869, "grad_norm": 2.009869337081909, "kl": 0.04962259903550148, "learning_rate": 1.3856541105586545e-07, "loss": 0.0, "reward": 1.0357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 700 }, { "completion_length": 171.50001525878906, "epoch": 0.6727447216890595, "grad_norm": 0.7950869798660278, "kl": 0.048153121024370193, "learning_rate": 1.3784044627398445e-07, "loss": 0.0, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 701 }, { "completion_length": 125.64286041259766, "epoch": 0.6737044145873321, "grad_norm": 1.5690401792526245, "kl": 0.06349439918994904, "learning_rate": 1.371166604222777e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 702 }, { "completion_length": 192.7857208251953, "epoch": 0.6746641074856046, "grad_norm": 1.5067847967147827, "kl": 0.05000684782862663, "learning_rate": 1.363940611085946e-07, "loss": 0.0, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 703 }, { "completion_length": 177.6428680419922, "epoch": 0.6756238003838771, "grad_norm": 0.8894903659820557, "kl": 0.05030280724167824, "learning_rate": 1.356726559283125e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 704 }, { "completion_length": 180.1428680419922, "epoch": 0.6765834932821497, "grad_norm": 1.0459188222885132, "kl": 0.05483631044626236, "learning_rate": 1.34952452464257e-07, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 705 }, { "completion_length": 177.7857208251953, "epoch": 0.6775431861804223, "grad_norm": 1.422247290611267, "kl": 0.05492786318063736, "learning_rate": 1.3423345828662235e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 706 }, { "completion_length": 150.42857360839844, "epoch": 0.6785028790786948, "grad_norm": 1.783859133720398, "kl": 0.061848435550928116, "learning_rate": 1.335156809528914e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 707 }, { "completion_length": 131.57144165039062, "epoch": 0.6794625719769674, "grad_norm": 2.164271831512451, "kl": 0.07669505476951599, "learning_rate": 1.3279912800775703e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 708 }, { "completion_length": 167.85714721679688, "epoch": 0.6804222648752399, "grad_norm": 1.5513684749603271, "kl": 0.0665874108672142, "learning_rate": 1.320838069830418e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 709 }, { "completion_length": 197.00001525878906, "epoch": 0.6813819577735125, "grad_norm": 1.816601037979126, "kl": 0.04965869337320328, "learning_rate": 1.3136972539761976e-07, "loss": 0.0, "reward": 1.321428656578064, "reward_std": 0.45456865429878235, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 710 }, { "completion_length": 145.2857208251953, "epoch": 0.682341650671785, "grad_norm": 1.285865306854248, "kl": 0.06681366264820099, "learning_rate": 1.3065689075733682e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 711 }, { "completion_length": 175.85714721679688, "epoch": 0.6833013435700576, "grad_norm": 1.2970638275146484, "kl": 0.05820751190185547, "learning_rate": 1.2994531055493213e-07, "loss": 0.0001, "reward": 0.8214285969734192, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 712 }, { "completion_length": 174.7857208251953, "epoch": 0.6842610364683301, "grad_norm": 1.9995707273483276, "kl": 0.05508657172322273, "learning_rate": 1.2923499226995883e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 713 }, { "completion_length": 202.42857360839844, "epoch": 0.6852207293666027, "grad_norm": 1.3515971899032593, "kl": 0.04321598634123802, "learning_rate": 1.2852594336870627e-07, "loss": 0.0, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 714 }, { "completion_length": 153.85714721679688, "epoch": 0.6861804222648752, "grad_norm": 2.012162446975708, "kl": 0.05275982990860939, "learning_rate": 1.2781817130412088e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 715 }, { "completion_length": 150.21429443359375, "epoch": 0.6871401151631478, "grad_norm": 0.7578821182250977, "kl": 0.06666524708271027, "learning_rate": 1.2711168351572786e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 716 }, { "completion_length": 193.7857208251953, "epoch": 0.6880998080614203, "grad_norm": 0.005209199618548155, "kl": 0.042060486972332, "learning_rate": 1.264064874295534e-07, "loss": 0.0, "reward": 1.071428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 717 }, { "completion_length": 127.5714340209961, "epoch": 0.6890595009596929, "grad_norm": 4.752388954162598, "kl": 0.1452418863773346, "learning_rate": 1.2570259045804627e-07, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 718 }, { "completion_length": 145.5, "epoch": 0.6900191938579654, "grad_norm": 2.607269048690796, "kl": 0.06079309433698654, "learning_rate": 1.2500000000000005e-07, "loss": 0.0001, "reward": 1.0357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 719 }, { "completion_length": 197.50001525878906, "epoch": 0.690978886756238, "grad_norm": 1.7288379669189453, "kl": 0.08346785604953766, "learning_rate": 1.2429872344047507e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 720 }, { "completion_length": 169.6428680419922, "epoch": 0.6919385796545106, "grad_norm": 1.3455041646957397, "kl": 0.045987121760845184, "learning_rate": 1.235987681507214e-07, "loss": 0.0, "reward": 1.2857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 721 }, { "completion_length": 182.35714721679688, "epoch": 0.6928982725527831, "grad_norm": 1.0405672788619995, "kl": 0.0591299906373024, "learning_rate": 1.2290014148810062e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 722 }, { "completion_length": 177.1428680419922, "epoch": 0.6938579654510557, "grad_norm": 1.4984934329986572, "kl": 0.0663982480764389, "learning_rate": 1.2220285079600915e-07, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 723 }, { "completion_length": 145.35714721679688, "epoch": 0.6948176583493282, "grad_norm": 1.6629459857940674, "kl": 0.07844380289316177, "learning_rate": 1.2150690340380061e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 724 }, { "completion_length": 159.07144165039062, "epoch": 0.6957773512476008, "grad_norm": 1.5506623983383179, "kl": 0.08201645314693451, "learning_rate": 1.2081230662670907e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 725 }, { "completion_length": 154.42857360839844, "epoch": 0.6967370441458733, "grad_norm": 2.1414878368377686, "kl": 0.05297980085015297, "learning_rate": 1.2011906776577202e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 726 }, { "completion_length": 165.35714721679688, "epoch": 0.6976967370441459, "grad_norm": 1.0493385791778564, "kl": 0.07592572271823883, "learning_rate": 1.1942719410775335e-07, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 727 }, { "completion_length": 167.1428680419922, "epoch": 0.6986564299424184, "grad_norm": 1.5054583549499512, "kl": 0.05087639391422272, "learning_rate": 1.1873669292506749e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 728 }, { "completion_length": 205.92857360839844, "epoch": 0.699616122840691, "grad_norm": 1.675349235534668, "kl": 0.10205795615911484, "learning_rate": 1.1804757147570213e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 729 }, { "completion_length": 172.85714721679688, "epoch": 0.7005758157389635, "grad_norm": 0.008327499963343143, "kl": 0.06392735242843628, "learning_rate": 1.1735983700314256e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 730 }, { "completion_length": 157.7857208251953, "epoch": 0.7015355086372361, "grad_norm": 2.052877902984619, "kl": 0.05818486586213112, "learning_rate": 1.1667349673629526e-07, "loss": 0.0001, "reward": 0.7500000596046448, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1071428656578064, "step": 731 }, { "completion_length": 152.1428680419922, "epoch": 0.7024952015355086, "grad_norm": 1.7485111951828003, "kl": 0.07357092201709747, "learning_rate": 1.1598855788941189e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 732 }, { "completion_length": 159.71429443359375, "epoch": 0.7034548944337812, "grad_norm": 1.0210052728652954, "kl": 0.07677095383405685, "learning_rate": 1.1530502766201369e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 733 }, { "completion_length": 127.78572082519531, "epoch": 0.7044145873320538, "grad_norm": 2.312889575958252, "kl": 0.06817948818206787, "learning_rate": 1.1462291323881528e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 734 }, { "completion_length": 179.57144165039062, "epoch": 0.7053742802303263, "grad_norm": 1.9471819400787354, "kl": 0.04602804034948349, "learning_rate": 1.139422217896499e-07, "loss": 0.0, "reward": 1.321428656578064, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 735 }, { "completion_length": 148.5, "epoch": 0.7063339731285988, "grad_norm": 2.4381513595581055, "kl": 0.07359839975833893, "learning_rate": 1.1326296046939333e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 736 }, { "completion_length": 124.21429443359375, "epoch": 0.7072936660268714, "grad_norm": 2.6379876136779785, "kl": 0.12841062247753143, "learning_rate": 1.1258513641788913e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 737 }, { "completion_length": 127.5714340209961, "epoch": 0.708253358925144, "grad_norm": 2.615793466567993, "kl": 0.07313230633735657, "learning_rate": 1.1190875675987355e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 738 }, { "completion_length": 205.07144165039062, "epoch": 0.7092130518234165, "grad_norm": 1.5683133602142334, "kl": 0.041997261345386505, "learning_rate": 1.1123382860490035e-07, "loss": 0.0, "reward": 1.1785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 739 }, { "completion_length": 137.42857360839844, "epoch": 0.710172744721689, "grad_norm": 2.12727952003479, "kl": 0.1277492344379425, "learning_rate": 1.1056035904726651e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 740 }, { "completion_length": 151.85714721679688, "epoch": 0.7111324376199616, "grad_norm": 2.1246562004089355, "kl": 0.08422967791557312, "learning_rate": 1.0988835516593712e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 741 }, { "completion_length": 174.07144165039062, "epoch": 0.7120921305182342, "grad_norm": 1.2658343315124512, "kl": 0.05658894032239914, "learning_rate": 1.0921782402447158e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 742 }, { "completion_length": 138.0, "epoch": 0.7130518234165067, "grad_norm": 2.0425479412078857, "kl": 0.07248783111572266, "learning_rate": 1.085487726709487e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 743 }, { "completion_length": 134.1428680419922, "epoch": 0.7140115163147792, "grad_norm": 0.007306164596229792, "kl": 0.056678883731365204, "learning_rate": 1.0788120813789326e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 744 }, { "completion_length": 130.6428680419922, "epoch": 0.7149712092130518, "grad_norm": 2.379451274871826, "kl": 0.07192464172840118, "learning_rate": 1.0721513744220168e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 745 }, { "completion_length": 153.5, "epoch": 0.7159309021113244, "grad_norm": 0.007944568991661072, "kl": 0.06385745108127594, "learning_rate": 1.0655056758506845e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 746 }, { "completion_length": 167.35714721679688, "epoch": 0.716890595009597, "grad_norm": 1.306231141090393, "kl": 0.06023316830396652, "learning_rate": 1.0588750555191225e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 747 }, { "completion_length": 151.07144165039062, "epoch": 0.7178502879078695, "grad_norm": 2.2272398471832275, "kl": 0.05436970293521881, "learning_rate": 1.0522595831230294e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 748 }, { "completion_length": 197.92857360839844, "epoch": 0.718809980806142, "grad_norm": 1.896213173866272, "kl": 0.07121201604604721, "learning_rate": 1.0456593281988815e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 749 }, { "completion_length": 176.1428680419922, "epoch": 0.7197696737044146, "grad_norm": 2.0774710178375244, "kl": 0.06472312659025192, "learning_rate": 1.0390743601231983e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 750 }, { "completion_length": 146.6428680419922, "epoch": 0.7207293666026872, "grad_norm": 0.9923790097236633, "kl": 0.07779577374458313, "learning_rate": 1.0325047481118191e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 751 }, { "completion_length": 193.35714721679688, "epoch": 0.7216890595009597, "grad_norm": 1.19814133644104, "kl": 0.05680115148425102, "learning_rate": 1.0259505612191724e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 752 }, { "completion_length": 158.5, "epoch": 0.7226487523992322, "grad_norm": 2.810091018676758, "kl": 0.07115041464567184, "learning_rate": 1.0194118683375502e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 753 }, { "completion_length": 175.7857208251953, "epoch": 0.7236084452975048, "grad_norm": 0.8033470511436462, "kl": 0.06011195853352547, "learning_rate": 1.0128887381963826e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 754 }, { "completion_length": 154.21429443359375, "epoch": 0.7245681381957774, "grad_norm": 1.8986526727676392, "kl": 0.08022084832191467, "learning_rate": 1.0063812393615198e-07, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 755 }, { "completion_length": 172.2857208251953, "epoch": 0.72552783109405, "grad_norm": 1.81910240650177, "kl": 0.062163565307855606, "learning_rate": 9.998894402345043e-08, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 756 }, { "completion_length": 162.35714721679688, "epoch": 0.7264875239923224, "grad_norm": 2.0797665119171143, "kl": 0.06442483514547348, "learning_rate": 9.934134090518592e-08, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 757 }, { "completion_length": 162.5, "epoch": 0.727447216890595, "grad_norm": 1.7525032758712769, "kl": 0.07218505442142487, "learning_rate": 9.869532138843672e-08, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 758 }, { "completion_length": 172.6428680419922, "epoch": 0.7284069097888676, "grad_norm": 2.1933445930480957, "kl": 0.07267063856124878, "learning_rate": 9.805089226363553e-08, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 759 }, { "completion_length": 168.57144165039062, "epoch": 0.7293666026871402, "grad_norm": 1.5241202116012573, "kl": 0.05872455984354019, "learning_rate": 9.740806030449822e-08, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 760 }, { "completion_length": 192.6428680419922, "epoch": 0.7303262955854126, "grad_norm": 1.3369804620742798, "kl": 0.04724837839603424, "learning_rate": 9.676683226795229e-08, "loss": 0.0, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 761 }, { "completion_length": 209.2857208251953, "epoch": 0.7312859884836852, "grad_norm": 1.6892890930175781, "kl": 0.0544462576508522, "learning_rate": 9.612721489406647e-08, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 762 }, { "completion_length": 135.2857208251953, "epoch": 0.7322456813819578, "grad_norm": 2.6419711112976074, "kl": 0.06799021363258362, "learning_rate": 9.548921490597916e-08, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.5555838942527771, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 763 }, { "completion_length": 164.42857360839844, "epoch": 0.7332053742802304, "grad_norm": 1.4180176258087158, "kl": 0.0769302099943161, "learning_rate": 9.485283900982841e-08, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 764 }, { "completion_length": 170.6428680419922, "epoch": 0.7341650671785028, "grad_norm": 2.9922935962677, "kl": 0.06449516117572784, "learning_rate": 9.421809389468097e-08, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 765 }, { "completion_length": 137.71429443359375, "epoch": 0.7351247600767754, "grad_norm": 2.722059726715088, "kl": 0.09102123230695724, "learning_rate": 9.358498623246219e-08, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 766 }, { "completion_length": 134.7857208251953, "epoch": 0.736084452975048, "grad_norm": 2.4659318923950195, "kl": 0.08341188728809357, "learning_rate": 9.295352267788592e-08, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 767 }, { "completion_length": 184.07144165039062, "epoch": 0.7370441458733206, "grad_norm": 1.7298033237457275, "kl": 0.055890895426273346, "learning_rate": 9.232370986838428e-08, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 768 }, { "completion_length": 164.85714721679688, "epoch": 0.738003838771593, "grad_norm": 2.108725070953369, "kl": 0.07457836717367172, "learning_rate": 9.169555442403834e-08, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.45456865429878235, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 769 }, { "completion_length": 171.2857208251953, "epoch": 0.7389635316698656, "grad_norm": 1.3671447038650513, "kl": 0.09678307920694351, "learning_rate": 9.106906294750804e-08, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 770 }, { "completion_length": 151.57144165039062, "epoch": 0.7399232245681382, "grad_norm": 1.6705909967422485, "kl": 0.06276058405637741, "learning_rate": 9.044424202396325e-08, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 771 }, { "completion_length": 213.1428680419922, "epoch": 0.7408829174664108, "grad_norm": 0.929561972618103, "kl": 0.05355656519532204, "learning_rate": 8.982109822101425e-08, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 772 }, { "completion_length": 186.6428680419922, "epoch": 0.7418426103646834, "grad_norm": 1.939001441001892, "kl": 0.06761445850133896, "learning_rate": 8.919963808864283e-08, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 773 }, { "completion_length": 145.85714721679688, "epoch": 0.7428023032629558, "grad_norm": 1.723799705505371, "kl": 0.0929340198636055, "learning_rate": 8.857986815913351e-08, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 774 }, { "completion_length": 180.07144165039062, "epoch": 0.7437619961612284, "grad_norm": 0.7628232836723328, "kl": 0.058332376182079315, "learning_rate": 8.796179494700439e-08, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 775 }, { "completion_length": 157.57144165039062, "epoch": 0.744721689059501, "grad_norm": 2.048546552658081, "kl": 0.07301290333271027, "learning_rate": 8.734542494893954e-08, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 776 }, { "completion_length": 140.42857360839844, "epoch": 0.7456813819577736, "grad_norm": 1.2849981784820557, "kl": 0.09661278873682022, "learning_rate": 8.673076464371979e-08, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 777 }, { "completion_length": 183.35714721679688, "epoch": 0.746641074856046, "grad_norm": 1.1351187229156494, "kl": 0.06077054888010025, "learning_rate": 8.611782049215532e-08, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 778 }, { "completion_length": 180.92857360839844, "epoch": 0.7476007677543186, "grad_norm": 2.457608938217163, "kl": 0.12941613793373108, "learning_rate": 8.550659893701753e-08, "loss": 0.0001, "reward": 0.9642857313156128, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.1428571492433548, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 779 }, { "completion_length": 225.71429443359375, "epoch": 0.7485604606525912, "grad_norm": 1.3477623462677002, "kl": 0.05514109879732132, "learning_rate": 8.489710640297124e-08, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 780 }, { "completion_length": 171.21429443359375, "epoch": 0.7495201535508638, "grad_norm": 1.160226583480835, "kl": 0.06894733756780624, "learning_rate": 8.4289349296507e-08, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 781 }, { "completion_length": 176.85714721679688, "epoch": 0.7504798464491362, "grad_norm": 0.005965807009488344, "kl": 0.0718400627374649, "learning_rate": 8.368333400587419e-08, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 782 }, { "completion_length": 146.5, "epoch": 0.7514395393474088, "grad_norm": 2.761793851852417, "kl": 0.07628853619098663, "learning_rate": 8.307906690101363e-08, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 783 }, { "completion_length": 173.00001525878906, "epoch": 0.7523992322456814, "grad_norm": 1.3816022872924805, "kl": 0.05990772694349289, "learning_rate": 8.247655433349046e-08, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 784 }, { "completion_length": 155.2857208251953, "epoch": 0.753358925143954, "grad_norm": 2.4853899478912354, "kl": 0.07434310019016266, "learning_rate": 8.187580263642768e-08, "loss": 0.0001, "reward": 1.25, "reward_std": 0.45456865429878235, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 785 }, { "completion_length": 133.35714721679688, "epoch": 0.7543186180422264, "grad_norm": 2.3404414653778076, "kl": 0.13104891777038574, "learning_rate": 8.127681812443946e-08, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.4040609896183014, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 786 }, { "completion_length": 218.6428680419922, "epoch": 0.755278310940499, "grad_norm": 0.5206183195114136, "kl": 0.05267123878002167, "learning_rate": 8.067960709356478e-08, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 787 }, { "completion_length": 202.71429443359375, "epoch": 0.7562380038387716, "grad_norm": 1.561854600906372, "kl": 0.053069278597831726, "learning_rate": 8.008417582120097e-08, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 788 }, { "completion_length": 149.35714721679688, "epoch": 0.7571976967370442, "grad_norm": 1.8281679153442383, "kl": 0.09013672918081284, "learning_rate": 7.94905305660384e-08, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 789 }, { "completion_length": 160.07144165039062, "epoch": 0.7581573896353166, "grad_norm": 1.4283928871154785, "kl": 0.07369735091924667, "learning_rate": 7.889867756799384e-08, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 790 }, { "completion_length": 129.92857360839844, "epoch": 0.7591170825335892, "grad_norm": 2.746553421020508, "kl": 0.11436791718006134, "learning_rate": 7.830862304814564e-08, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 791 }, { "completion_length": 151.85714721679688, "epoch": 0.7600767754318618, "grad_norm": 1.18337082862854, "kl": 0.11057320982217789, "learning_rate": 7.772037320866786e-08, "loss": 0.0001, "reward": 1.0, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.0714285746216774, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 792 }, { "completion_length": 185.85714721679688, "epoch": 0.7610364683301344, "grad_norm": 2.010540246963501, "kl": 0.05724227800965309, "learning_rate": 7.71339342327653e-08, "loss": 0.0001, "reward": 1.25, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 793 }, { "completion_length": 195.85714721679688, "epoch": 0.761996161228407, "grad_norm": 2.186030149459839, "kl": 0.07007696479558945, "learning_rate": 7.65493122846084e-08, "loss": 0.0001, "reward": 1.25, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 794 }, { "completion_length": 156.07144165039062, "epoch": 0.7629558541266794, "grad_norm": 1.8844494819641113, "kl": 0.08488921076059341, "learning_rate": 7.596651350926836e-08, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.5555838942527771, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 795 }, { "completion_length": 152.71429443359375, "epoch": 0.763915547024952, "grad_norm": 1.9277139902114868, "kl": 0.08093374222517014, "learning_rate": 7.53855440326529e-08, "loss": 0.0001, "reward": 1.0, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 796 }, { "completion_length": 173.92857360839844, "epoch": 0.7648752399232246, "grad_norm": 1.9168744087219238, "kl": 0.062082935124635696, "learning_rate": 7.480640996144136e-08, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 797 }, { "completion_length": 176.1428680419922, "epoch": 0.7658349328214972, "grad_norm": 2.416025400161743, "kl": 0.059505052864551544, "learning_rate": 7.422911738302104e-08, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 798 }, { "completion_length": 249.07144165039062, "epoch": 0.7667946257197696, "grad_norm": 0.7423478960990906, "kl": 0.04660583660006523, "learning_rate": 7.365367236542283e-08, "loss": 0.0, "reward": 1.3928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 799 }, { "completion_length": 143.7857208251953, "epoch": 0.7677543186180422, "grad_norm": 2.515890121459961, "kl": 0.09107743203639984, "learning_rate": 7.30800809572576e-08, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 800 }, { "completion_length": 191.85714721679688, "epoch": 0.7687140115163148, "grad_norm": 2.4360485076904297, "kl": 0.06705436110496521, "learning_rate": 4.5215892322153824e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 801 }, { "completion_length": 177.35714721679688, "epoch": 0.7696737044145874, "grad_norm": 1.9346245527267456, "kl": 0.05819321796298027, "learning_rate": 4.519998587381474e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 802 }, { "completion_length": 185.92857360839844, "epoch": 0.7706333973128598, "grad_norm": 2.2615737915039062, "kl": 0.08867713809013367, "learning_rate": 4.518405583369589e-07, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 803 }, { "completion_length": 154.71429443359375, "epoch": 0.7715930902111324, "grad_norm": 1.8738528490066528, "kl": 0.07673639059066772, "learning_rate": 4.516810222040213e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 804 }, { "completion_length": 151.2857208251953, "epoch": 0.772552783109405, "grad_norm": 2.270580291748047, "kl": 0.07542555034160614, "learning_rate": 4.515212505256587e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 805 }, { "completion_length": 212.71429443359375, "epoch": 0.7735124760076776, "grad_norm": 1.1095463037490845, "kl": 0.06740870326757431, "learning_rate": 4.5136124348847013e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 806 }, { "completion_length": 164.6428680419922, "epoch": 0.77447216890595, "grad_norm": 1.8266383409500122, "kl": 0.07076450437307358, "learning_rate": 4.5120100127932954e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 807 }, { "completion_length": 216.85714721679688, "epoch": 0.7754318618042226, "grad_norm": 1.4657787084579468, "kl": 0.05043704807758331, "learning_rate": 4.510405240853854e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 808 }, { "completion_length": 125.5714340209961, "epoch": 0.7763915547024952, "grad_norm": 1.991865873336792, "kl": 0.1235930472612381, "learning_rate": 4.508798120940609e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 809 }, { "completion_length": 203.00001525878906, "epoch": 0.7773512476007678, "grad_norm": 1.2065846920013428, "kl": 0.06126104295253754, "learning_rate": 4.507188654930532e-07, "loss": 0.0001, "reward": 1.1428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 810 }, { "completion_length": 152.5, "epoch": 0.7783109404990403, "grad_norm": 2.2621612548828125, "kl": 0.10955814272165298, "learning_rate": 4.5055768447033346e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.45456865429878235, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 811 }, { "completion_length": 165.42857360839844, "epoch": 0.7792706333973128, "grad_norm": 1.7224174737930298, "kl": 0.12461017072200775, "learning_rate": 4.5039626921414695e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 812 }, { "completion_length": 160.35714721679688, "epoch": 0.7802303262955854, "grad_norm": 2.110593557357788, "kl": 0.11466866731643677, "learning_rate": 4.5023461991301216e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 813 }, { "completion_length": 173.35714721679688, "epoch": 0.781190019193858, "grad_norm": 3.1460063457489014, "kl": 0.08593205362558365, "learning_rate": 4.50072736755721e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 814 }, { "completion_length": 161.42857360839844, "epoch": 0.7821497120921305, "grad_norm": 2.868330955505371, "kl": 0.11551554501056671, "learning_rate": 4.499106199313386e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.45456865429878235, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 815 }, { "completion_length": 178.42857360839844, "epoch": 0.783109404990403, "grad_norm": 2.1494390964508057, "kl": 0.08347202092409134, "learning_rate": 4.49748269629203e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 816 }, { "completion_length": 169.85714721679688, "epoch": 0.7840690978886756, "grad_norm": 1.249086618423462, "kl": 0.11863739788532257, "learning_rate": 4.4958568603892467e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 817 }, { "completion_length": 218.42857360839844, "epoch": 0.7850287907869482, "grad_norm": 1.979446530342102, "kl": 0.0949835479259491, "learning_rate": 4.4942286935038687e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 818 }, { "completion_length": 172.35714721679688, "epoch": 0.7859884836852208, "grad_norm": 1.3448765277862549, "kl": 0.11870449781417847, "learning_rate": 4.492598197537449e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 819 }, { "completion_length": 172.07144165039062, "epoch": 0.7869481765834933, "grad_norm": 1.8413708209991455, "kl": 0.09352341294288635, "learning_rate": 4.4909653743942614e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.3571428656578064, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 820 }, { "completion_length": 222.50001525878906, "epoch": 0.7879078694817658, "grad_norm": 1.468582272529602, "kl": 0.07583273947238922, "learning_rate": 4.489330225981298e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 821 }, { "completion_length": 172.50001525878906, "epoch": 0.7888675623800384, "grad_norm": 1.0841811895370483, "kl": 0.0772314965724945, "learning_rate": 4.4876927542082654e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 822 }, { "completion_length": 150.57144165039062, "epoch": 0.789827255278311, "grad_norm": 2.954563617706299, "kl": 0.12368310242891312, "learning_rate": 4.486052960987585e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.5555838942527771, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 823 }, { "completion_length": 135.85714721679688, "epoch": 0.7907869481765835, "grad_norm": 2.2601511478424072, "kl": 0.14680370688438416, "learning_rate": 4.484410848234388e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.0714285746216774, "step": 824 }, { "completion_length": 186.35714721679688, "epoch": 0.791746641074856, "grad_norm": 1.7349586486816406, "kl": 0.1366397738456726, "learning_rate": 4.482766417866517e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 825 }, { "completion_length": 181.50001525878906, "epoch": 0.7927063339731286, "grad_norm": 0.9491437673568726, "kl": 0.13190196454524994, "learning_rate": 4.4811196718045184e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1071428656578064, "step": 826 }, { "completion_length": 164.0, "epoch": 0.7936660268714012, "grad_norm": 2.72786808013916, "kl": 0.1335868537425995, "learning_rate": 4.479470611971645e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 827 }, { "completion_length": 199.92857360839844, "epoch": 0.7946257197696737, "grad_norm": 2.5156641006469727, "kl": 0.1045866534113884, "learning_rate": 4.477819240293852e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 828 }, { "completion_length": 161.0, "epoch": 0.7955854126679462, "grad_norm": 2.4781899452209473, "kl": 0.1255405694246292, "learning_rate": 4.4761655586997926e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 829 }, { "completion_length": 174.7857208251953, "epoch": 0.7965451055662188, "grad_norm": 2.5143253803253174, "kl": 0.13061754405498505, "learning_rate": 4.4745095691208214e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 830 }, { "completion_length": 183.35714721679688, "epoch": 0.7975047984644914, "grad_norm": 2.018907308578491, "kl": 0.10846563428640366, "learning_rate": 4.472851273490984e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 831 }, { "completion_length": 144.92857360839844, "epoch": 0.7984644913627639, "grad_norm": 1.766376256942749, "kl": 0.14145779609680176, "learning_rate": 4.471190673747023e-07, "loss": 0.0001, "reward": 1.25, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1071428656578064, "step": 832 }, { "completion_length": 146.21429443359375, "epoch": 0.7994241842610365, "grad_norm": 1.682865858078003, "kl": 0.12663666903972626, "learning_rate": 4.4695277718283707e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 833 }, { "completion_length": 164.07144165039062, "epoch": 0.800383877159309, "grad_norm": 1.8588975667953491, "kl": 0.09940137714147568, "learning_rate": 4.467862569677148e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 834 }, { "completion_length": 117.64286041259766, "epoch": 0.8013435700575816, "grad_norm": 1.2202250957489014, "kl": 0.15375176072120667, "learning_rate": 4.4661950692381613e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 835 }, { "completion_length": 173.00001525878906, "epoch": 0.8023032629558541, "grad_norm": 2.312180995941162, "kl": 0.11141744256019592, "learning_rate": 4.464525272458903e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 836 }, { "completion_length": 165.1428680419922, "epoch": 0.8032629558541267, "grad_norm": 3.9952080249786377, "kl": 0.12826047837734222, "learning_rate": 4.4628531812895475e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 837 }, { "completion_length": 174.21429443359375, "epoch": 0.8042226487523992, "grad_norm": 1.8014719486236572, "kl": 0.11799079924821854, "learning_rate": 4.4611787976829463e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 838 }, { "completion_length": 147.35714721679688, "epoch": 0.8051823416506718, "grad_norm": 2.1287453174591064, "kl": 0.13791875541210175, "learning_rate": 4.4595021235946307e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 839 }, { "completion_length": 177.35714721679688, "epoch": 0.8061420345489443, "grad_norm": 3.0064644813537598, "kl": 0.11802324652671814, "learning_rate": 4.4578231609828065e-07, "loss": 0.0001, "reward": 1.1071429252624512, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 840 }, { "completion_length": 146.07144165039062, "epoch": 0.8071017274472169, "grad_norm": 2.2102935314178467, "kl": 0.15051019191741943, "learning_rate": 4.456141911808352e-07, "loss": 0.0002, "reward": 1.071428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 841 }, { "completion_length": 207.1428680419922, "epoch": 0.8080614203454894, "grad_norm": 2.03520131111145, "kl": 0.08360902220010757, "learning_rate": 4.454458378034817e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.45456865429878235, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 842 }, { "completion_length": 162.21429443359375, "epoch": 0.809021113243762, "grad_norm": 1.4077401161193848, "kl": 0.11652470380067825, "learning_rate": 4.4527725616284163e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 843 }, { "completion_length": 147.1428680419922, "epoch": 0.8099808061420346, "grad_norm": 2.3245930671691895, "kl": 0.1259394735097885, "learning_rate": 4.451084464558035e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1071428656578064, "step": 844 }, { "completion_length": 122.92857360839844, "epoch": 0.8109404990403071, "grad_norm": 2.1736485958099365, "kl": 0.1855751872062683, "learning_rate": 4.449394088795219e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 845 }, { "completion_length": 178.00001525878906, "epoch": 0.8119001919385797, "grad_norm": 2.1216325759887695, "kl": 0.1022050604224205, "learning_rate": 4.4477014363141755e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 846 }, { "completion_length": 176.85714721679688, "epoch": 0.8128598848368522, "grad_norm": 2.437819480895996, "kl": 0.1035403460264206, "learning_rate": 4.446006509091773e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 847 }, { "completion_length": 159.1428680419922, "epoch": 0.8138195777351248, "grad_norm": 2.1661524772644043, "kl": 0.15175801515579224, "learning_rate": 4.4443093091075344e-07, "loss": 0.0002, "reward": 1.25, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 848 }, { "completion_length": 187.07144165039062, "epoch": 0.8147792706333973, "grad_norm": 2.1910266876220703, "kl": 0.12343594431877136, "learning_rate": 4.442609838343638e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 849 }, { "completion_length": 153.35714721679688, "epoch": 0.8157389635316699, "grad_norm": 1.1331279277801514, "kl": 0.11769981682300568, "learning_rate": 4.4409080987849135e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 850 }, { "completion_length": 201.85714721679688, "epoch": 0.8166986564299424, "grad_norm": 1.8360708951950073, "kl": 0.07707281410694122, "learning_rate": 4.4392040924188404e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 851 }, { "completion_length": 175.6428680419922, "epoch": 0.817658349328215, "grad_norm": 1.5168054103851318, "kl": 0.11669564247131348, "learning_rate": 4.437497821235547e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 852 }, { "completion_length": 145.6428680419922, "epoch": 0.8186180422264875, "grad_norm": 2.7573585510253906, "kl": 0.15986499190330505, "learning_rate": 4.4357892872278045e-07, "loss": 0.0002, "reward": 1.321428656578064, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 853 }, { "completion_length": 201.00001525878906, "epoch": 0.8195777351247601, "grad_norm": 2.5074329376220703, "kl": 0.09575939923524857, "learning_rate": 4.4340784923910295e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.6060914993286133, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 854 }, { "completion_length": 184.6428680419922, "epoch": 0.8205374280230326, "grad_norm": 2.2746124267578125, "kl": 0.1265283077955246, "learning_rate": 4.432365438723277e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 855 }, { "completion_length": 144.07144165039062, "epoch": 0.8214971209213052, "grad_norm": 1.2827552556991577, "kl": 0.17299026250839233, "learning_rate": 4.4306501282252396e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1071428656578064, "step": 856 }, { "completion_length": 140.6428680419922, "epoch": 0.8224568138195777, "grad_norm": 2.376018524169922, "kl": 0.14691919088363647, "learning_rate": 4.4289325629002485e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 857 }, { "completion_length": 228.9285888671875, "epoch": 0.8234165067178503, "grad_norm": 1.7439433336257935, "kl": 0.21875110268592834, "learning_rate": 4.427212744754265e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 858 }, { "completion_length": 170.0, "epoch": 0.8243761996161229, "grad_norm": 1.4620741605758667, "kl": 0.140095517039299, "learning_rate": 4.4254906757958844e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 859 }, { "completion_length": 107.50000762939453, "epoch": 0.8253358925143954, "grad_norm": 2.264909029006958, "kl": 0.18798433244228363, "learning_rate": 4.4237663580363293e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 860 }, { "completion_length": 176.07144165039062, "epoch": 0.8262955854126679, "grad_norm": 1.5678454637527466, "kl": 0.1418493688106537, "learning_rate": 4.422039793489448e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 861 }, { "completion_length": 210.7857208251953, "epoch": 0.8272552783109405, "grad_norm": 1.7714829444885254, "kl": 0.09624839574098587, "learning_rate": 4.420310984171715e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 862 }, { "completion_length": 167.71429443359375, "epoch": 0.8282149712092131, "grad_norm": 2.271725654602051, "kl": 0.1258973926305771, "learning_rate": 4.4185799321022245e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 863 }, { "completion_length": 180.00001525878906, "epoch": 0.8291746641074856, "grad_norm": 2.146252155303955, "kl": 0.09945307672023773, "learning_rate": 4.41684663930269e-07, "loss": 0.0001, "reward": 1.1785714626312256, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 864 }, { "completion_length": 162.5, "epoch": 0.8301343570057581, "grad_norm": 2.39168119430542, "kl": 0.14991115033626556, "learning_rate": 4.415111107797445e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 865 }, { "completion_length": 196.6428680419922, "epoch": 0.8310940499040307, "grad_norm": 2.152338981628418, "kl": 0.15174467861652374, "learning_rate": 4.413373339613433e-07, "loss": 0.0002, "reward": 1.2857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 866 }, { "completion_length": 143.0, "epoch": 0.8320537428023033, "grad_norm": 1.6655155420303345, "kl": 0.16276796162128448, "learning_rate": 4.4116333367802127e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 867 }, { "completion_length": 164.1428680419922, "epoch": 0.8330134357005758, "grad_norm": 1.4838814735412598, "kl": 0.10342831909656525, "learning_rate": 4.4098911013299527e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 868 }, { "completion_length": 142.07144165039062, "epoch": 0.8339731285988484, "grad_norm": 2.341975212097168, "kl": 0.26776862144470215, "learning_rate": 4.408146635297428e-07, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 869 }, { "completion_length": 169.07144165039062, "epoch": 0.8349328214971209, "grad_norm": 2.3526594638824463, "kl": 0.11517242342233658, "learning_rate": 4.406399940720019e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 870 }, { "completion_length": 144.57144165039062, "epoch": 0.8358925143953935, "grad_norm": 1.6017311811447144, "kl": 0.14980582892894745, "learning_rate": 4.4046510196377086e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 871 }, { "completion_length": 198.00001525878906, "epoch": 0.836852207293666, "grad_norm": 2.326713800430298, "kl": 0.1262207329273224, "learning_rate": 4.402899874093082e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 872 }, { "completion_length": 171.42857360839844, "epoch": 0.8378119001919386, "grad_norm": 2.0037009716033936, "kl": 0.14669473469257355, "learning_rate": 4.401146506131319e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 873 }, { "completion_length": 130.85714721679688, "epoch": 0.8387715930902111, "grad_norm": 2.5258355140686035, "kl": 0.1864318698644638, "learning_rate": 4.399390917800198e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 874 }, { "completion_length": 183.6428680419922, "epoch": 0.8397312859884837, "grad_norm": 1.062536358833313, "kl": 0.11539648473262787, "learning_rate": 4.3976331111500887e-07, "loss": 0.0001, "reward": 1.2857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 875 }, { "completion_length": 170.7857208251953, "epoch": 0.8406909788867563, "grad_norm": 0.8813589215278625, "kl": 0.1341594010591507, "learning_rate": 4.3958730882339534e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1071428656578064, "step": 876 }, { "completion_length": 202.00001525878906, "epoch": 0.8416506717850288, "grad_norm": 2.4043829441070557, "kl": 0.13172104954719543, "learning_rate": 4.3941108511073405e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 877 }, { "completion_length": 184.2857208251953, "epoch": 0.8426103646833013, "grad_norm": 1.8616597652435303, "kl": 0.11842437833547592, "learning_rate": 4.392346401828386e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 878 }, { "completion_length": 132.57144165039062, "epoch": 0.8435700575815739, "grad_norm": 2.2810373306274414, "kl": 0.19498321413993835, "learning_rate": 4.3905797424578093e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 879 }, { "completion_length": 153.2857208251953, "epoch": 0.8445297504798465, "grad_norm": 2.736848831176758, "kl": 0.16533324122428894, "learning_rate": 4.3888108750589106e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 880 }, { "completion_length": 174.00001525878906, "epoch": 0.845489443378119, "grad_norm": 1.264396071434021, "kl": 0.12454938888549805, "learning_rate": 4.3870398016975696e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1071428656578064, "step": 881 }, { "completion_length": 157.71429443359375, "epoch": 0.8464491362763915, "grad_norm": 1.9732813835144043, "kl": 0.15785305202007294, "learning_rate": 4.38526652444224e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 882 }, { "completion_length": 143.5, "epoch": 0.8474088291746641, "grad_norm": 2.4239697456359863, "kl": 0.15298208594322205, "learning_rate": 4.383491045363954e-07, "loss": 0.0002, "reward": 1.25, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1071428656578064, "step": 883 }, { "completion_length": 193.92857360839844, "epoch": 0.8483685220729367, "grad_norm": 1.850409984588623, "kl": 0.12944233417510986, "learning_rate": 4.381713366536311e-07, "loss": 0.0001, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 884 }, { "completion_length": 166.2857208251953, "epoch": 0.8493282149712092, "grad_norm": 2.0691230297088623, "kl": 0.16904538869857788, "learning_rate": 4.379933490035481e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 885 }, { "completion_length": 208.21429443359375, "epoch": 0.8502879078694817, "grad_norm": 1.6973471641540527, "kl": 0.12374360114336014, "learning_rate": 4.3781514179402016e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 886 }, { "completion_length": 163.6428680419922, "epoch": 0.8512476007677543, "grad_norm": 2.408881425857544, "kl": 0.12163714319467545, "learning_rate": 4.3763671523317736e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 887 }, { "completion_length": 206.21429443359375, "epoch": 0.8522072936660269, "grad_norm": 2.3353192806243896, "kl": 0.11618292331695557, "learning_rate": 4.37458069529406e-07, "loss": 0.0001, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 888 }, { "completion_length": 159.42857360839844, "epoch": 0.8531669865642995, "grad_norm": 2.034996747970581, "kl": 0.13224749267101288, "learning_rate": 4.3727920489134826e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 889 }, { "completion_length": 198.07144165039062, "epoch": 0.8541266794625719, "grad_norm": 0.9019782543182373, "kl": 0.10836869478225708, "learning_rate": 4.371001215279021e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 890 }, { "completion_length": 180.35714721679688, "epoch": 0.8550863723608445, "grad_norm": 1.8948228359222412, "kl": 0.16660505533218384, "learning_rate": 4.369208196482209e-07, "loss": 0.0002, "reward": 1.2142857313156128, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 891 }, { "completion_length": 186.1428680419922, "epoch": 0.8560460652591171, "grad_norm": 1.8725552558898926, "kl": 0.13903935253620148, "learning_rate": 4.3674129946171324e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 892 }, { "completion_length": 192.71429443359375, "epoch": 0.8570057581573897, "grad_norm": 1.7588642835617065, "kl": 0.12108507007360458, "learning_rate": 4.3656156117804266e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 893 }, { "completion_length": 201.07144165039062, "epoch": 0.8579654510556622, "grad_norm": 1.8287501335144043, "kl": 0.1054278165102005, "learning_rate": 4.363816050071275e-07, "loss": 0.0001, "reward": 1.071428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.2857142984867096, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 894 }, { "completion_length": 195.2857208251953, "epoch": 0.8589251439539347, "grad_norm": 1.5580706596374512, "kl": 0.14600640535354614, "learning_rate": 4.3620143115914034e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 895 }, { "completion_length": 121.28572082519531, "epoch": 0.8598848368522073, "grad_norm": 3.8853702545166016, "kl": 0.2402605414390564, "learning_rate": 4.3602103984450833e-07, "loss": 0.0002, "reward": 1.321428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1071428656578064, "step": 896 }, { "completion_length": 147.2857208251953, "epoch": 0.8608445297504799, "grad_norm": 1.3451780080795288, "kl": 0.15900340676307678, "learning_rate": 4.3584043127391224e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 897 }, { "completion_length": 142.92857360839844, "epoch": 0.8618042226487524, "grad_norm": 2.6328866481781006, "kl": 0.1647530347108841, "learning_rate": 4.3565960565828687e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 898 }, { "completion_length": 158.57144165039062, "epoch": 0.8627639155470249, "grad_norm": 2.0476832389831543, "kl": 0.16165056824684143, "learning_rate": 4.3547856320882036e-07, "loss": 0.0002, "reward": 1.3928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 899 }, { "completion_length": 151.6428680419922, "epoch": 0.8637236084452975, "grad_norm": 2.0315279960632324, "kl": 0.12848329544067383, "learning_rate": 4.3529730413695416e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 900 }, { "completion_length": 174.1428680419922, "epoch": 0.8646833013435701, "grad_norm": 2.342961072921753, "kl": 0.13585011661052704, "learning_rate": 4.351158286543826e-07, "loss": 0.0001, "reward": 1.2142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 901 }, { "completion_length": 186.1428680419922, "epoch": 0.8656429942418427, "grad_norm": 2.502344846725464, "kl": 0.14534136652946472, "learning_rate": 4.3493413697305293e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 902 }, { "completion_length": 160.2857208251953, "epoch": 0.8666026871401151, "grad_norm": 1.5286492109298706, "kl": 0.16826291382312775, "learning_rate": 4.3475222930516473e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 903 }, { "completion_length": 148.0, "epoch": 0.8675623800383877, "grad_norm": 2.33026385307312, "kl": 0.14643186330795288, "learning_rate": 4.3457010586317e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 904 }, { "completion_length": 169.92857360839844, "epoch": 0.8685220729366603, "grad_norm": 2.0900285243988037, "kl": 0.160984605550766, "learning_rate": 4.343877668597726e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 905 }, { "completion_length": 180.6428680419922, "epoch": 0.8694817658349329, "grad_norm": 1.6280186176300049, "kl": 0.12181156873703003, "learning_rate": 4.342052125079282e-07, "loss": 0.0001, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 906 }, { "completion_length": 135.35714721679688, "epoch": 0.8704414587332053, "grad_norm": 0.9334993362426758, "kl": 0.19511233270168304, "learning_rate": 4.3402244302084393e-07, "loss": 0.0002, "reward": 1.3928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 907 }, { "completion_length": 166.6428680419922, "epoch": 0.8714011516314779, "grad_norm": 1.4395105838775635, "kl": 0.15156112611293793, "learning_rate": 4.338394586119784e-07, "loss": 0.0002, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 908 }, { "completion_length": 122.71429443359375, "epoch": 0.8723608445297505, "grad_norm": 2.343600034713745, "kl": 0.22209012508392334, "learning_rate": 4.336562594950409e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 909 }, { "completion_length": 189.85714721679688, "epoch": 0.8733205374280231, "grad_norm": 1.5834014415740967, "kl": 0.1511719971895218, "learning_rate": 4.3347284588399167e-07, "loss": 0.0002, "reward": 1.2857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 910 }, { "completion_length": 179.57144165039062, "epoch": 0.8742802303262955, "grad_norm": 2.035491704940796, "kl": 0.14763590693473816, "learning_rate": 4.332892179930415e-07, "loss": 0.0001, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 911 }, { "completion_length": 143.5, "epoch": 0.8752399232245681, "grad_norm": 2.008474349975586, "kl": 0.16144457459449768, "learning_rate": 4.3310537603665133e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 912 }, { "completion_length": 162.5, "epoch": 0.8761996161228407, "grad_norm": 1.7128863334655762, "kl": 0.17019574344158173, "learning_rate": 4.3292132022953207e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 913 }, { "completion_length": 139.85714721679688, "epoch": 0.8771593090211133, "grad_norm": 2.6940512657165527, "kl": 0.19049802422523499, "learning_rate": 4.327370507866447e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 914 }, { "completion_length": 145.92857360839844, "epoch": 0.8781190019193857, "grad_norm": 0.992807149887085, "kl": 0.16135811805725098, "learning_rate": 4.3255256792319914e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 915 }, { "completion_length": 178.92857360839844, "epoch": 0.8790786948176583, "grad_norm": 1.1431232690811157, "kl": 0.14107704162597656, "learning_rate": 4.323678718546552e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 916 }, { "completion_length": 174.92857360839844, "epoch": 0.8800383877159309, "grad_norm": 1.5578680038452148, "kl": 0.22296278178691864, "learning_rate": 4.321829627967212e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 917 }, { "completion_length": 113.21429443359375, "epoch": 0.8809980806142035, "grad_norm": 2.5740578174591064, "kl": 0.19609925150871277, "learning_rate": 4.319978409653545e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 918 }, { "completion_length": 164.57144165039062, "epoch": 0.8819577735124761, "grad_norm": 0.009696886874735355, "kl": 0.14899134635925293, "learning_rate": 4.3181250657676087e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.0714285746216774, "step": 919 }, { "completion_length": 184.1428680419922, "epoch": 0.8829174664107485, "grad_norm": 1.0594658851623535, "kl": 0.14532586932182312, "learning_rate": 4.316269598473943e-07, "loss": 0.0001, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 920 }, { "completion_length": 209.00001525878906, "epoch": 0.8838771593090211, "grad_norm": 1.1084494590759277, "kl": 0.13272236287593842, "learning_rate": 4.314412009939568e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 921 }, { "completion_length": 158.71429443359375, "epoch": 0.8848368522072937, "grad_norm": 2.6590113639831543, "kl": 0.18553516268730164, "learning_rate": 4.3125523023339815e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 922 }, { "completion_length": 136.6428680419922, "epoch": 0.8857965451055663, "grad_norm": 1.9278428554534912, "kl": 0.16434164345264435, "learning_rate": 4.3106904778291556e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 923 }, { "completion_length": 135.0, "epoch": 0.8867562380038387, "grad_norm": 1.2076970338821411, "kl": 0.16954368352890015, "learning_rate": 4.3088265385995353e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 924 }, { "completion_length": 171.50001525878906, "epoch": 0.8877159309021113, "grad_norm": 1.5332624912261963, "kl": 0.14450526237487793, "learning_rate": 4.3069604868220353e-07, "loss": 0.0001, "reward": 1.3928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 925 }, { "completion_length": 134.42857360839844, "epoch": 0.8886756238003839, "grad_norm": 1.7947934865951538, "kl": 0.207505002617836, "learning_rate": 4.3050923246760367e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 926 }, { "completion_length": 216.2857208251953, "epoch": 0.8896353166986565, "grad_norm": 0.4057447910308838, "kl": 0.14190834760665894, "learning_rate": 4.303222054343387e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 927 }, { "completion_length": 193.57144165039062, "epoch": 0.8905950095969289, "grad_norm": 1.7869563102722168, "kl": 0.1374649852514267, "learning_rate": 4.3013496780083944e-07, "loss": 0.0001, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 928 }, { "completion_length": 147.5, "epoch": 0.8915547024952015, "grad_norm": 1.0008618831634521, "kl": 0.1897430270910263, "learning_rate": 4.299475197857828e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 929 }, { "completion_length": 173.42857360839844, "epoch": 0.8925143953934741, "grad_norm": 2.3426997661590576, "kl": 0.173048198223114, "learning_rate": 4.297598616080912e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 930 }, { "completion_length": 180.07144165039062, "epoch": 0.8934740882917467, "grad_norm": 2.4867653846740723, "kl": 0.157678484916687, "learning_rate": 4.2957199348693275e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 931 }, { "completion_length": 151.85714721679688, "epoch": 0.8944337811900192, "grad_norm": 2.815748929977417, "kl": 0.19751772284507751, "learning_rate": 4.2938391564172054e-07, "loss": 0.0002, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 932 }, { "completion_length": 175.07144165039062, "epoch": 0.8953934740882917, "grad_norm": 1.3713716268539429, "kl": 0.16773483157157898, "learning_rate": 4.291956282921128e-07, "loss": 0.0002, "reward": 1.1428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 933 }, { "completion_length": 168.0, "epoch": 0.8963531669865643, "grad_norm": 1.846179485321045, "kl": 0.20831070840358734, "learning_rate": 4.290071316580123e-07, "loss": 0.0002, "reward": 1.3571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 934 }, { "completion_length": 155.21429443359375, "epoch": 0.8973128598848369, "grad_norm": 2.3109071254730225, "kl": 0.16551624238491058, "learning_rate": 4.288184259595662e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 935 }, { "completion_length": 154.7857208251953, "epoch": 0.8982725527831094, "grad_norm": 2.669950008392334, "kl": 0.15799498558044434, "learning_rate": 4.28629511417166e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 936 }, { "completion_length": 170.07144165039062, "epoch": 0.8992322456813819, "grad_norm": 2.1560871601104736, "kl": 0.17254240810871124, "learning_rate": 4.28440388251447e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 937 }, { "completion_length": 185.71429443359375, "epoch": 0.9001919385796545, "grad_norm": 1.7750482559204102, "kl": 0.13163700699806213, "learning_rate": 4.282510566832882e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 938 }, { "completion_length": 161.6428680419922, "epoch": 0.9011516314779271, "grad_norm": 2.2936575412750244, "kl": 0.16130828857421875, "learning_rate": 4.280615169338119e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 939 }, { "completion_length": 184.21429443359375, "epoch": 0.9021113243761996, "grad_norm": 2.291755437850952, "kl": 0.1589900553226471, "learning_rate": 4.278717692243837e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 940 }, { "completion_length": 132.1428680419922, "epoch": 0.9030710172744721, "grad_norm": 2.987111806869507, "kl": 0.2400379627943039, "learning_rate": 4.276818137766118e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 941 }, { "completion_length": 172.7857208251953, "epoch": 0.9040307101727447, "grad_norm": 1.6337674856185913, "kl": 0.16205355525016785, "learning_rate": 4.274916508123474e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 942 }, { "completion_length": 152.6428680419922, "epoch": 0.9049904030710173, "grad_norm": 2.331010103225708, "kl": 0.21920427680015564, "learning_rate": 4.2730128055368363e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 943 }, { "completion_length": 152.42857360839844, "epoch": 0.9059500959692899, "grad_norm": 2.797351121902466, "kl": 0.17752705514431, "learning_rate": 4.271107032229563e-07, "loss": 0.0002, "reward": 1.1428571939468384, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 944 }, { "completion_length": 152.6428680419922, "epoch": 0.9069097888675623, "grad_norm": 2.166710615158081, "kl": 0.1723240464925766, "learning_rate": 4.269199190427424e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 945 }, { "completion_length": 175.6428680419922, "epoch": 0.9078694817658349, "grad_norm": 2.1104047298431396, "kl": 0.1580582708120346, "learning_rate": 4.267289282358609e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 946 }, { "completion_length": 145.35714721679688, "epoch": 0.9088291746641075, "grad_norm": 1.7790769338607788, "kl": 0.17972394824028015, "learning_rate": 4.2653773102537206e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 947 }, { "completion_length": 161.2857208251953, "epoch": 0.9097888675623801, "grad_norm": 1.6295726299285889, "kl": 0.17541716992855072, "learning_rate": 4.263463276345772e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 948 }, { "completion_length": 141.21429443359375, "epoch": 0.9107485604606526, "grad_norm": 2.496845245361328, "kl": 0.22770977020263672, "learning_rate": 4.2615471828701823e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 949 }, { "completion_length": 165.92857360839844, "epoch": 0.9117082533589251, "grad_norm": 2.168529987335205, "kl": 0.1653045117855072, "learning_rate": 4.2596290320647787e-07, "loss": 0.0002, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 950 }, { "completion_length": 113.0714340209961, "epoch": 0.9126679462571977, "grad_norm": 1.3556386232376099, "kl": 0.21140508353710175, "learning_rate": 4.2577088261697896e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 951 }, { "completion_length": 141.57144165039062, "epoch": 0.9136276391554703, "grad_norm": 2.273030996322632, "kl": 0.187930628657341, "learning_rate": 4.255786567427845e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 952 }, { "completion_length": 141.71429443359375, "epoch": 0.9145873320537428, "grad_norm": 2.293578624725342, "kl": 0.17989088594913483, "learning_rate": 4.25386225808397e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 953 }, { "completion_length": 199.2857208251953, "epoch": 0.9155470249520153, "grad_norm": 1.353972315788269, "kl": 0.18552538752555847, "learning_rate": 4.2519359003855864e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 954 }, { "completion_length": 162.57144165039062, "epoch": 0.9165067178502879, "grad_norm": 2.2686069011688232, "kl": 0.17009302973747253, "learning_rate": 4.2500074965825093e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 955 }, { "completion_length": 221.35714721679688, "epoch": 0.9174664107485605, "grad_norm": 1.691919207572937, "kl": 0.14190657436847687, "learning_rate": 4.248077048926941e-07, "loss": 0.0001, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 956 }, { "completion_length": 176.07144165039062, "epoch": 0.918426103646833, "grad_norm": 1.7553461790084839, "kl": 0.17991453409194946, "learning_rate": 4.2461445596734715e-07, "loss": 0.0002, "reward": 1.321428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 957 }, { "completion_length": 222.07144165039062, "epoch": 0.9193857965451055, "grad_norm": 2.2406764030456543, "kl": 0.1561635136604309, "learning_rate": 4.244210031079077e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 958 }, { "completion_length": 143.57144165039062, "epoch": 0.9203454894433781, "grad_norm": 2.4738481044769287, "kl": 0.2465304136276245, "learning_rate": 4.2422734654031135e-07, "loss": 0.0002, "reward": 1.321428656578064, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 959 }, { "completion_length": 174.00001525878906, "epoch": 0.9213051823416507, "grad_norm": 1.1833672523498535, "kl": 0.1428680270910263, "learning_rate": 4.2403348649073167e-07, "loss": 0.0001, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 960 }, { "completion_length": 190.42857360839844, "epoch": 0.9222648752399232, "grad_norm": 2.2632598876953125, "kl": 0.13760563731193542, "learning_rate": 4.238394231855799e-07, "loss": 0.0001, "reward": 1.4642857313156128, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 961 }, { "completion_length": 163.92857360839844, "epoch": 0.9232245681381958, "grad_norm": 2.093719720840454, "kl": 0.1613253951072693, "learning_rate": 4.236451568515046e-07, "loss": 0.0002, "reward": 1.3928571939468384, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 962 }, { "completion_length": 149.57144165039062, "epoch": 0.9241842610364683, "grad_norm": 1.3518831729888916, "kl": 0.20314131677150726, "learning_rate": 4.234506877153916e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 963 }, { "completion_length": 171.57144165039062, "epoch": 0.9251439539347409, "grad_norm": 1.500829815864563, "kl": 0.1670256406068802, "learning_rate": 4.2325601600436343e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 964 }, { "completion_length": 164.92857360839844, "epoch": 0.9261036468330134, "grad_norm": 2.0851306915283203, "kl": 0.1713995784521103, "learning_rate": 4.230611419457793e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 965 }, { "completion_length": 158.6428680419922, "epoch": 0.927063339731286, "grad_norm": 1.1897790431976318, "kl": 0.18961693346500397, "learning_rate": 4.2286606576723466e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 966 }, { "completion_length": 159.92857360839844, "epoch": 0.9280230326295585, "grad_norm": 2.7167208194732666, "kl": 0.19394929707050323, "learning_rate": 4.2267078769656105e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 967 }, { "completion_length": 140.07144165039062, "epoch": 0.9289827255278311, "grad_norm": 1.751585602760315, "kl": 0.21058110892772675, "learning_rate": 4.2247530796182595e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 968 }, { "completion_length": 132.5, "epoch": 0.9299424184261037, "grad_norm": 2.320756196975708, "kl": 0.21314476430416107, "learning_rate": 4.2227962679133213e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 969 }, { "completion_length": 194.07144165039062, "epoch": 0.9309021113243762, "grad_norm": 2.0526976585388184, "kl": 0.22351492941379547, "learning_rate": 4.2208374441361774e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 970 }, { "completion_length": 172.1428680419922, "epoch": 0.9318618042226487, "grad_norm": 2.0873799324035645, "kl": 0.16316214203834534, "learning_rate": 4.2188766105745585e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 971 }, { "completion_length": 193.57144165039062, "epoch": 0.9328214971209213, "grad_norm": 2.1299750804901123, "kl": 0.17353209853172302, "learning_rate": 4.2169137695185436e-07, "loss": 0.0002, "reward": 1.25, "reward_std": 0.45456865429878235, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 972 }, { "completion_length": 168.92857360839844, "epoch": 0.9337811900191939, "grad_norm": 1.1543985605239868, "kl": 0.16974620521068573, "learning_rate": 4.214948923260556e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 973 }, { "completion_length": 163.42857360839844, "epoch": 0.9347408829174664, "grad_norm": 2.7331154346466064, "kl": 0.18122881650924683, "learning_rate": 4.212982074095361e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 974 }, { "completion_length": 113.64286041259766, "epoch": 0.935700575815739, "grad_norm": 2.7158126831054688, "kl": 0.35514703392982483, "learning_rate": 4.2110132243200614e-07, "loss": 0.0004, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1428571492433548, "step": 975 }, { "completion_length": 197.6428680419922, "epoch": 0.9366602687140115, "grad_norm": 1.1523102521896362, "kl": 0.20189528167247772, "learning_rate": 4.2090423762341e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 976 }, { "completion_length": 187.50001525878906, "epoch": 0.9376199616122841, "grad_norm": 1.5810389518737793, "kl": 0.251001238822937, "learning_rate": 4.2070695321392497e-07, "loss": 0.0003, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 977 }, { "completion_length": 120.85714721679688, "epoch": 0.9385796545105566, "grad_norm": 2.898531198501587, "kl": 0.23638375103473663, "learning_rate": 4.2050946943396164e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.1785714328289032, "step": 978 }, { "completion_length": 191.57144165039062, "epoch": 0.9395393474088292, "grad_norm": 0.7084457278251648, "kl": 0.19702336192131042, "learning_rate": 4.203117865141635e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 979 }, { "completion_length": 175.6428680419922, "epoch": 0.9404990403071017, "grad_norm": 1.0746110677719116, "kl": 0.1909438818693161, "learning_rate": 4.201139046854065e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 980 }, { "completion_length": 122.50000762939453, "epoch": 0.9414587332053743, "grad_norm": 1.945700764656067, "kl": 0.3154948949813843, "learning_rate": 4.1991582417879903e-07, "loss": 0.0003, "reward": 1.4642857313156128, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 981 }, { "completion_length": 152.21429443359375, "epoch": 0.9424184261036468, "grad_norm": 1.7398653030395508, "kl": 0.20472468435764313, "learning_rate": 4.1971754522568133e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 982 }, { "completion_length": 170.5, "epoch": 0.9433781190019194, "grad_norm": 2.367933750152588, "kl": 0.19843587279319763, "learning_rate": 4.195190680576255e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 983 }, { "completion_length": 215.85714721679688, "epoch": 0.944337811900192, "grad_norm": 1.0992072820663452, "kl": 0.2044789344072342, "learning_rate": 4.193203929064353e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 984 }, { "completion_length": 174.35714721679688, "epoch": 0.9452975047984645, "grad_norm": 1.5323668718338013, "kl": 0.27023836970329285, "learning_rate": 4.1912152000414534e-07, "loss": 0.0003, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 985 }, { "completion_length": 152.92857360839844, "epoch": 0.946257197696737, "grad_norm": 2.2847187519073486, "kl": 0.19636093080043793, "learning_rate": 4.189224495830216e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 986 }, { "completion_length": 173.57144165039062, "epoch": 0.9472168905950096, "grad_norm": 0.010327545925974846, "kl": 0.16767975687980652, "learning_rate": 4.1872318187556057e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 987 }, { "completion_length": 159.85714721679688, "epoch": 0.9481765834932822, "grad_norm": 2.1962037086486816, "kl": 0.19946545362472534, "learning_rate": 4.185237171144889e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 988 }, { "completion_length": 161.1428680419922, "epoch": 0.9491362763915547, "grad_norm": 1.4317357540130615, "kl": 0.18097074329853058, "learning_rate": 4.1832405553276387e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 989 }, { "completion_length": 166.5, "epoch": 0.9500959692898272, "grad_norm": 0.0075844768434762955, "kl": 0.17018939554691315, "learning_rate": 4.181241973635723e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 990 }, { "completion_length": 240.21429443359375, "epoch": 0.9510556621880998, "grad_norm": 1.278357982635498, "kl": 0.10595779865980148, "learning_rate": 4.1792414284033066e-07, "loss": 0.0001, "reward": 1.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 991 }, { "completion_length": 174.57144165039062, "epoch": 0.9520153550863724, "grad_norm": 0.8926373720169067, "kl": 0.17882102727890015, "learning_rate": 4.177238921966848e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 992 }, { "completion_length": 251.57144165039062, "epoch": 0.9529750479846449, "grad_norm": 1.1822398900985718, "kl": 0.14454656839370728, "learning_rate": 4.175234456665095e-07, "loss": 0.0001, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 993 }, { "completion_length": 164.07144165039062, "epoch": 0.9539347408829175, "grad_norm": 1.8440866470336914, "kl": 0.19946084916591644, "learning_rate": 4.173228034839086e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 994 }, { "completion_length": 182.2857208251953, "epoch": 0.95489443378119, "grad_norm": 0.9021524786949158, "kl": 0.18866883218288422, "learning_rate": 4.171219658832141e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 995 }, { "completion_length": 164.71429443359375, "epoch": 0.9558541266794626, "grad_norm": 1.854368805885315, "kl": 0.2180839329957962, "learning_rate": 4.1692093309898633e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 996 }, { "completion_length": 170.1428680419922, "epoch": 0.9568138195777351, "grad_norm": 1.2204807996749878, "kl": 0.21623320877552032, "learning_rate": 4.1671970536601387e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 997 }, { "completion_length": 133.7857208251953, "epoch": 0.9577735124760077, "grad_norm": 1.5816521644592285, "kl": 0.2774593234062195, "learning_rate": 4.165182829193126e-07, "loss": 0.0003, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 998 }, { "completion_length": 135.6428680419922, "epoch": 0.9587332053742802, "grad_norm": 2.0824859142303467, "kl": 0.2354128509759903, "learning_rate": 4.163166659941258e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 999 }, { "completion_length": 155.85714721679688, "epoch": 0.9596928982725528, "grad_norm": 2.2026970386505127, "kl": 0.21415354311466217, "learning_rate": 4.161148548259242e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 1000 }, { "completion_length": 191.07144165039062, "epoch": 0.9606525911708254, "grad_norm": 1.6791126728057861, "kl": 0.20153124630451202, "learning_rate": 4.159128496504053e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1001 }, { "completion_length": 112.64286041259766, "epoch": 0.9616122840690979, "grad_norm": 2.5777781009674072, "kl": 0.3052109479904175, "learning_rate": 4.15710650703493e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1002 }, { "completion_length": 129.21429443359375, "epoch": 0.9625719769673704, "grad_norm": 2.579775810241699, "kl": 0.3143688142299652, "learning_rate": 4.155082582213376e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1003 }, { "completion_length": 166.1428680419922, "epoch": 0.963531669865643, "grad_norm": 1.2678511142730713, "kl": 0.2238951474428177, "learning_rate": 4.1530567244031557e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1004 }, { "completion_length": 163.2857208251953, "epoch": 0.9644913627639156, "grad_norm": 1.8953132629394531, "kl": 0.22564013302326202, "learning_rate": 4.1510289359702875e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1005 }, { "completion_length": 161.5, "epoch": 0.9654510556621881, "grad_norm": 1.7514156103134155, "kl": 0.2795505225658417, "learning_rate": 4.1489992192830504e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1006 }, { "completion_length": 207.50001525878906, "epoch": 0.9664107485604606, "grad_norm": 0.9710041880607605, "kl": 0.1836845427751541, "learning_rate": 4.1469675767119697e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1007 }, { "completion_length": 169.6428680419922, "epoch": 0.9673704414587332, "grad_norm": 2.105051279067993, "kl": 0.23773248493671417, "learning_rate": 4.1449340106298246e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 1008 }, { "completion_length": 189.6428680419922, "epoch": 0.9683301343570058, "grad_norm": 0.6678879857063293, "kl": 0.23710009455680847, "learning_rate": 4.142898523411637e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1009 }, { "completion_length": 191.07144165039062, "epoch": 0.9692898272552783, "grad_norm": 0.9153040051460266, "kl": 0.23363077640533447, "learning_rate": 4.1408611174346745e-07, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1010 }, { "completion_length": 191.71429443359375, "epoch": 0.9702495201535508, "grad_norm": 1.4831290245056152, "kl": 0.21374395489692688, "learning_rate": 4.1388217950784465e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1011 }, { "completion_length": 143.42857360839844, "epoch": 0.9712092130518234, "grad_norm": 1.5334080457687378, "kl": 0.2664640545845032, "learning_rate": 4.1367805587246984e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1012 }, { "completion_length": 160.57144165039062, "epoch": 0.972168905950096, "grad_norm": 2.8555729389190674, "kl": 0.2705492377281189, "learning_rate": 4.134737410757412e-07, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1013 }, { "completion_length": 184.1428680419922, "epoch": 0.9731285988483686, "grad_norm": 0.8860253095626831, "kl": 0.22307245433330536, "learning_rate": 4.132692353562803e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1014 }, { "completion_length": 171.92857360839844, "epoch": 0.974088291746641, "grad_norm": 1.5898070335388184, "kl": 0.23741787672042847, "learning_rate": 4.130645389529313e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1015 }, { "completion_length": 160.1428680419922, "epoch": 0.9750479846449136, "grad_norm": 0.6628162860870361, "kl": 0.30127984285354614, "learning_rate": 4.128596521047616e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1016 }, { "completion_length": 133.6428680419922, "epoch": 0.9760076775431862, "grad_norm": 2.1336987018585205, "kl": 0.33273571729660034, "learning_rate": 4.126545750510605e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1017 }, { "completion_length": 197.1428680419922, "epoch": 0.9769673704414588, "grad_norm": 0.6278882622718811, "kl": 0.2334177941083908, "learning_rate": 4.1244930803133995e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1018 }, { "completion_length": 171.2857208251953, "epoch": 0.9779270633397313, "grad_norm": 1.9309805631637573, "kl": 0.25460848212242126, "learning_rate": 4.122438512853332e-07, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1019 }, { "completion_length": 172.57144165039062, "epoch": 0.9788867562380038, "grad_norm": 1.6436792612075806, "kl": 0.24625283479690552, "learning_rate": 4.120382050529956e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1020 }, { "completion_length": 144.5, "epoch": 0.9798464491362764, "grad_norm": 1.527305006980896, "kl": 0.28473854064941406, "learning_rate": 4.118323695745035e-07, "loss": 0.0003, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1021 }, { "completion_length": 189.2857208251953, "epoch": 0.980806142034549, "grad_norm": 1.6667498350143433, "kl": 0.24288301169872284, "learning_rate": 4.116263450902544e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1022 }, { "completion_length": 137.92857360839844, "epoch": 0.9817658349328215, "grad_norm": 1.6952290534973145, "kl": 0.3103066682815552, "learning_rate": 4.114201318408665e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1023 }, { "completion_length": 254.71429443359375, "epoch": 0.982725527831094, "grad_norm": 1.6906660795211792, "kl": 0.1920548528432846, "learning_rate": 4.112137300671784e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1024 }, { "completion_length": 200.92857360839844, "epoch": 0.9836852207293666, "grad_norm": 1.0223010778427124, "kl": 0.2121150642633438, "learning_rate": 4.1100714001024907e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1025 }, { "completion_length": 166.5, "epoch": 0.9846449136276392, "grad_norm": 1.3087208271026611, "kl": 0.3094004988670349, "learning_rate": 4.108003619113571e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1026 }, { "completion_length": 194.2857208251953, "epoch": 0.9856046065259118, "grad_norm": 1.1457622051239014, "kl": 0.22822214663028717, "learning_rate": 4.1059339601200093e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1027 }, { "completion_length": 160.07144165039062, "epoch": 0.9865642994241842, "grad_norm": 2.2164793014526367, "kl": 0.27731719613075256, "learning_rate": 4.103862425538982e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1028 }, { "completion_length": 184.00001525878906, "epoch": 0.9875239923224568, "grad_norm": 1.5260193347930908, "kl": 0.22345244884490967, "learning_rate": 4.1017890177898573e-07, "loss": 0.0002, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1029 }, { "completion_length": 180.2857208251953, "epoch": 0.9884836852207294, "grad_norm": 1.5072451829910278, "kl": 0.25488799810409546, "learning_rate": 4.0997137392941893e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1030 }, { "completion_length": 210.2857208251953, "epoch": 0.989443378119002, "grad_norm": 1.7655417919158936, "kl": 0.2554980218410492, "learning_rate": 4.097636592475719e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1031 }, { "completion_length": 213.07144165039062, "epoch": 0.9904030710172744, "grad_norm": 1.5099455118179321, "kl": 0.201838418841362, "learning_rate": 4.0955575797603674e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1032 }, { "completion_length": 178.57144165039062, "epoch": 0.991362763915547, "grad_norm": 1.6293164491653442, "kl": 0.30832529067993164, "learning_rate": 4.093476703576236e-07, "loss": 0.0003, "reward": 1.3928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.4285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1033 }, { "completion_length": 212.1428680419922, "epoch": 0.9923224568138196, "grad_norm": 1.7703684568405151, "kl": 0.3233477771282196, "learning_rate": 4.0913939663536037e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 1034 }, { "completion_length": 177.6428680419922, "epoch": 0.9932821497120922, "grad_norm": 1.7227165699005127, "kl": 0.28016340732574463, "learning_rate": 4.0893093705249207e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1035 }, { "completion_length": 219.6428680419922, "epoch": 0.9942418426103646, "grad_norm": 1.2657032012939453, "kl": 0.26244696974754333, "learning_rate": 4.087222918524807e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1036 }, { "completion_length": 162.1428680419922, "epoch": 0.9952015355086372, "grad_norm": 1.343782901763916, "kl": 0.3071088492870331, "learning_rate": 4.0851346127900546e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1037 }, { "completion_length": 168.07144165039062, "epoch": 0.9961612284069098, "grad_norm": 2.529819965362549, "kl": 0.32029587030410767, "learning_rate": 4.083044455759617e-07, "loss": 0.0003, "reward": 1.4642857313156128, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 1038 }, { "completion_length": 162.2857208251953, "epoch": 0.9971209213051824, "grad_norm": 1.3420369625091553, "kl": 0.3032599985599518, "learning_rate": 4.080952449874612e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1039 }, { "completion_length": 164.2857208251953, "epoch": 0.9980806142034548, "grad_norm": 0.009910755790770054, "kl": 0.2404022365808487, "learning_rate": 4.078858597578314e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1040 }, { "completion_length": 204.21429443359375, "epoch": 0.9990403071017274, "grad_norm": 0.8874227404594421, "kl": 0.2501865029335022, "learning_rate": 4.076762901316157e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1041 }, { "completion_length": 170.92857360839844, "epoch": 1.0, "grad_norm": 1.775046944618225, "kl": 0.2906855046749115, "learning_rate": 4.0746653635357264e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1042 }, { "completion_length": 154.2857208251953, "epoch": 1.0009596928982725, "grad_norm": 1.8281391859054565, "kl": 0.2990281283855438, "learning_rate": 4.07256598668676e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1043 }, { "completion_length": 214.57144165039062, "epoch": 1.0019193857965452, "grad_norm": 0.9008486866950989, "kl": 0.19738106429576874, "learning_rate": 4.0704647732211404e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1044 }, { "completion_length": 167.35714721679688, "epoch": 1.0028790786948176, "grad_norm": 3.3080923557281494, "kl": 0.25021591782569885, "learning_rate": 4.0683617255928997e-07, "loss": 0.0003, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1045 }, { "completion_length": 175.92857360839844, "epoch": 1.0038387715930903, "grad_norm": 1.2425875663757324, "kl": 0.23507018387317657, "learning_rate": 4.066256846258208e-07, "loss": 0.0002, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1046 }, { "completion_length": 173.21429443359375, "epoch": 1.0047984644913628, "grad_norm": 0.6192589402198792, "kl": 0.27089715003967285, "learning_rate": 4.064150137675377e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1047 }, { "completion_length": 208.57144165039062, "epoch": 1.0057581573896353, "grad_norm": 1.4861479997634888, "kl": 0.2115853875875473, "learning_rate": 4.0620416023048553e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1048 }, { "completion_length": 140.92857360839844, "epoch": 1.006717850287908, "grad_norm": 2.484308958053589, "kl": 0.3643427789211273, "learning_rate": 4.0599312426092225e-07, "loss": 0.0004, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1049 }, { "completion_length": 187.7857208251953, "epoch": 1.0076775431861804, "grad_norm": 1.8760334253311157, "kl": 0.267610102891922, "learning_rate": 4.05781906105319e-07, "loss": 0.0003, "reward": 1.4642857313156128, "reward_std": 0.45456865429878235, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1050 }, { "completion_length": 168.21429443359375, "epoch": 1.0086372360844529, "grad_norm": 1.6091583967208862, "kl": 0.24881893396377563, "learning_rate": 4.0557050601035983e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1051 }, { "completion_length": 221.4285888671875, "epoch": 1.0095969289827256, "grad_norm": 0.008675684221088886, "kl": 0.2201974093914032, "learning_rate": 4.053589242229412e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1052 }, { "completion_length": 232.71429443359375, "epoch": 1.010556621880998, "grad_norm": 1.3745567798614502, "kl": 0.2069643884897232, "learning_rate": 4.051471609901716e-07, "loss": 0.0002, "reward": 1.6071429252624512, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1053 }, { "completion_length": 207.85714721679688, "epoch": 1.0115163147792707, "grad_norm": 1.2833335399627686, "kl": 0.22067658603191376, "learning_rate": 4.049352165593717e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1054 }, { "completion_length": 175.92857360839844, "epoch": 1.0124760076775432, "grad_norm": 2.260439872741699, "kl": 0.2941804528236389, "learning_rate": 4.047230911780736e-07, "loss": 0.0003, "reward": 1.5357143878936768, "reward_std": 0.5555838942527771, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1055 }, { "completion_length": 203.21429443359375, "epoch": 1.0134357005758157, "grad_norm": 1.816745400428772, "kl": 0.4180804193019867, "learning_rate": 4.0451078509402087e-07, "loss": 0.0004, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1056 }, { "completion_length": 155.57144165039062, "epoch": 1.0143953934740884, "grad_norm": 1.3451777696609497, "kl": 0.3119773864746094, "learning_rate": 4.0429829855516795e-07, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1057 }, { "completion_length": 173.21429443359375, "epoch": 1.0153550863723608, "grad_norm": 1.7083945274353027, "kl": 0.26172924041748047, "learning_rate": 4.040856318096803e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1058 }, { "completion_length": 157.2857208251953, "epoch": 1.0163147792706333, "grad_norm": 1.8130723237991333, "kl": 0.3337620794773102, "learning_rate": 4.038727851059336e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1059 }, { "completion_length": 154.35714721679688, "epoch": 1.017274472168906, "grad_norm": 1.0928754806518555, "kl": 0.3296603262424469, "learning_rate": 4.036597586925138e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1060 }, { "completion_length": 185.1428680419922, "epoch": 1.0182341650671785, "grad_norm": 0.9719970226287842, "kl": 0.30506259202957153, "learning_rate": 4.0344655281821664e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1061 }, { "completion_length": 155.71429443359375, "epoch": 1.0191938579654511, "grad_norm": 1.2042105197906494, "kl": 0.2787649631500244, "learning_rate": 4.0323316773204774e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1062 }, { "completion_length": 145.21429443359375, "epoch": 1.0201535508637236, "grad_norm": 1.7534483671188354, "kl": 0.3105512261390686, "learning_rate": 4.0301960368322165e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1063 }, { "completion_length": 131.42857360839844, "epoch": 1.021113243761996, "grad_norm": 1.4320282936096191, "kl": 0.3522523045539856, "learning_rate": 4.028058609211622e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1064 }, { "completion_length": 209.35714721679688, "epoch": 1.0220729366602688, "grad_norm": 1.4919252395629883, "kl": 0.3039306402206421, "learning_rate": 4.025919396955019e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1065 }, { "completion_length": 150.2857208251953, "epoch": 1.0230326295585412, "grad_norm": 2.0111982822418213, "kl": 0.30186596512794495, "learning_rate": 4.023778402560814e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1066 }, { "completion_length": 174.71429443359375, "epoch": 1.023992322456814, "grad_norm": 1.7086999416351318, "kl": 0.2982555031776428, "learning_rate": 4.0216356285294995e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1067 }, { "completion_length": 157.0, "epoch": 1.0249520153550864, "grad_norm": 1.409734845161438, "kl": 0.3191165328025818, "learning_rate": 4.0194910773636435e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 1068 }, { "completion_length": 170.71429443359375, "epoch": 1.0259117082533589, "grad_norm": 0.5488816499710083, "kl": 0.2879602313041687, "learning_rate": 4.0173447515678915e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1069 }, { "completion_length": 206.1428680419922, "epoch": 1.0268714011516316, "grad_norm": 1.2693251371383667, "kl": 0.2857181131839752, "learning_rate": 4.015196653648959e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1070 }, { "completion_length": 161.07144165039062, "epoch": 1.027831094049904, "grad_norm": 0.883381724357605, "kl": 0.3067573010921478, "learning_rate": 4.0130467861156324e-07, "loss": 0.0003, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1071 }, { "completion_length": 175.6428680419922, "epoch": 1.0287907869481765, "grad_norm": 2.401344060897827, "kl": 0.3484783470630646, "learning_rate": 4.010895151478766e-07, "loss": 0.0003, "reward": 1.5357143878936768, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.25, "step": 1072 }, { "completion_length": 187.00001525878906, "epoch": 1.0297504798464492, "grad_norm": 1.7990412712097168, "kl": 0.2901994287967682, "learning_rate": 4.008741752251276e-07, "loss": 0.0003, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1073 }, { "completion_length": 198.50001525878906, "epoch": 1.0307101727447217, "grad_norm": 2.0135037899017334, "kl": 0.2655673921108246, "learning_rate": 4.006586590948141e-07, "loss": 0.0003, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1074 }, { "completion_length": 185.42857360839844, "epoch": 1.0316698656429943, "grad_norm": 1.8488428592681885, "kl": 0.2670310437679291, "learning_rate": 4.0044296700863965e-07, "loss": 0.0003, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1075 }, { "completion_length": 178.00001525878906, "epoch": 1.0326295585412668, "grad_norm": 1.6569963693618774, "kl": 0.2093581259250641, "learning_rate": 4.0022709921851337e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1076 }, { "completion_length": 139.7857208251953, "epoch": 1.0335892514395393, "grad_norm": 1.75818932056427, "kl": 0.3121327757835388, "learning_rate": 4.000110559765496e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1077 }, { "completion_length": 154.71429443359375, "epoch": 1.034548944337812, "grad_norm": 1.0766394138336182, "kl": 0.31979355216026306, "learning_rate": 3.9979483753506746e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1078 }, { "completion_length": 171.7857208251953, "epoch": 1.0355086372360844, "grad_norm": 1.399048924446106, "kl": 0.25951194763183594, "learning_rate": 3.9957844414659097e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1079 }, { "completion_length": 188.92857360839844, "epoch": 1.036468330134357, "grad_norm": 1.18674898147583, "kl": 0.27142927050590515, "learning_rate": 3.993618760638481e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1080 }, { "completion_length": 182.50001525878906, "epoch": 1.0374280230326296, "grad_norm": 1.2425241470336914, "kl": 0.26441752910614014, "learning_rate": 3.991451335397711e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1081 }, { "completion_length": 189.35714721679688, "epoch": 1.038387715930902, "grad_norm": 1.0449398756027222, "kl": 0.25219082832336426, "learning_rate": 3.989282168274959e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1082 }, { "completion_length": 137.71429443359375, "epoch": 1.0393474088291748, "grad_norm": 1.96175217628479, "kl": 0.336626261472702, "learning_rate": 3.9871112618036175e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1083 }, { "completion_length": 156.57144165039062, "epoch": 1.0403071017274472, "grad_norm": 1.549251675605774, "kl": 0.3033462464809418, "learning_rate": 3.9849386185191127e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1084 }, { "completion_length": 144.21429443359375, "epoch": 1.0412667946257197, "grad_norm": 0.7421916723251343, "kl": 0.34698399901390076, "learning_rate": 3.982764240958897e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1085 }, { "completion_length": 239.07144165039062, "epoch": 1.0422264875239924, "grad_norm": 0.8459969162940979, "kl": 0.24112899601459503, "learning_rate": 3.9805881316624503e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1086 }, { "completion_length": 167.35714721679688, "epoch": 1.0431861804222649, "grad_norm": 1.1038501262664795, "kl": 0.2746982276439667, "learning_rate": 3.978410293171273e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1087 }, { "completion_length": 197.71429443359375, "epoch": 1.0441458733205373, "grad_norm": 1.5568015575408936, "kl": 0.2162763476371765, "learning_rate": 3.976230728028886e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1088 }, { "completion_length": 198.1428680419922, "epoch": 1.04510556621881, "grad_norm": 0.7377315163612366, "kl": 0.20316895842552185, "learning_rate": 3.974049438780828e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1089 }, { "completion_length": 170.2857208251953, "epoch": 1.0460652591170825, "grad_norm": 1.0533148050308228, "kl": 0.2413427233695984, "learning_rate": 3.971866427974648e-07, "loss": 0.0002, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 1090 }, { "completion_length": 163.35714721679688, "epoch": 1.0470249520153552, "grad_norm": 2.0141327381134033, "kl": 0.3092270791530609, "learning_rate": 3.969681698159909e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1091 }, { "completion_length": 132.71429443359375, "epoch": 1.0479846449136276, "grad_norm": 1.8995516300201416, "kl": 0.29734641313552856, "learning_rate": 3.967495251888181e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1092 }, { "completion_length": 205.35714721679688, "epoch": 1.0489443378119, "grad_norm": 1.888195514678955, "kl": 0.24927011132240295, "learning_rate": 3.965307091713037e-07, "loss": 0.0002, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1093 }, { "completion_length": 229.85714721679688, "epoch": 1.0499040307101728, "grad_norm": 1.2226510047912598, "kl": 0.20661062002182007, "learning_rate": 3.963117220190052e-07, "loss": 0.0002, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1094 }, { "completion_length": 145.2857208251953, "epoch": 1.0508637236084453, "grad_norm": 1.8846043348312378, "kl": 0.31018102169036865, "learning_rate": 3.960925639876802e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1095 }, { "completion_length": 146.6428680419922, "epoch": 1.051823416506718, "grad_norm": 1.5057237148284912, "kl": 0.34966427087783813, "learning_rate": 3.9587323533328565e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 1096 }, { "completion_length": 187.2857208251953, "epoch": 1.0527831094049904, "grad_norm": 1.3404570817947388, "kl": 0.2605533003807068, "learning_rate": 3.9565373631197777e-07, "loss": 0.0003, "reward": 1.5000001192092896, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1097 }, { "completion_length": 205.42857360839844, "epoch": 1.053742802303263, "grad_norm": 1.2981644868850708, "kl": 0.2261297106742859, "learning_rate": 3.954340671801119e-07, "loss": 0.0002, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1098 }, { "completion_length": 192.6428680419922, "epoch": 1.0547024952015356, "grad_norm": 0.9897794127464294, "kl": 0.3105334937572479, "learning_rate": 3.95214228194242e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1099 }, { "completion_length": 233.1428680419922, "epoch": 1.055662188099808, "grad_norm": 1.3002192974090576, "kl": 0.26345714926719666, "learning_rate": 3.949942196111202e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1100 }, { "completion_length": 255.4285888671875, "epoch": 1.0566218809980805, "grad_norm": 1.0020902156829834, "kl": 0.1920848786830902, "learning_rate": 3.94774041687697e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1101 }, { "completion_length": 185.92857360839844, "epoch": 1.0575815738963532, "grad_norm": 1.7884811162948608, "kl": 0.23791731894016266, "learning_rate": 3.9455369468112064e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1102 }, { "completion_length": 158.42857360839844, "epoch": 1.0585412667946257, "grad_norm": 0.9466820955276489, "kl": 0.29386746883392334, "learning_rate": 3.9433317884873665e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1103 }, { "completion_length": 179.92857360839844, "epoch": 1.0595009596928984, "grad_norm": 1.0828043222427368, "kl": 0.3202075958251953, "learning_rate": 3.9411249444808776e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1104 }, { "completion_length": 203.1428680419922, "epoch": 1.0604606525911708, "grad_norm": 1.8578568696975708, "kl": 0.2532653212547302, "learning_rate": 3.9389164173691375e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1105 }, { "completion_length": 156.57144165039062, "epoch": 1.0614203454894433, "grad_norm": 0.8906612396240234, "kl": 0.32685476541519165, "learning_rate": 3.9367062097315084e-07, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1106 }, { "completion_length": 206.42857360839844, "epoch": 1.062380038387716, "grad_norm": 1.0463290214538574, "kl": 0.2639080882072449, "learning_rate": 3.9344943241493155e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1107 }, { "completion_length": 211.1428680419922, "epoch": 1.0633397312859885, "grad_norm": 0.015853416174650192, "kl": 0.25096091628074646, "learning_rate": 3.9322807632058444e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1108 }, { "completion_length": 192.92857360839844, "epoch": 1.064299424184261, "grad_norm": 1.6530088186264038, "kl": 0.24373246729373932, "learning_rate": 3.930065529486334e-07, "loss": 0.0002, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1109 }, { "completion_length": 171.50001525878906, "epoch": 1.0652591170825336, "grad_norm": 1.8940831422805786, "kl": 0.3239191770553589, "learning_rate": 3.927848625577983e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1110 }, { "completion_length": 182.6428680419922, "epoch": 1.066218809980806, "grad_norm": 1.512353539466858, "kl": 0.2825901210308075, "learning_rate": 3.925630054069935e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1111 }, { "completion_length": 172.92857360839844, "epoch": 1.0671785028790788, "grad_norm": 1.8488956689834595, "kl": 0.3072563707828522, "learning_rate": 3.923409817553284e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1112 }, { "completion_length": 229.6428680419922, "epoch": 1.0681381957773513, "grad_norm": 1.858206868171692, "kl": 0.2167946994304657, "learning_rate": 3.9211879186210675e-07, "loss": 0.0002, "reward": 1.4642857313156128, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1113 }, { "completion_length": 187.21429443359375, "epoch": 1.0690978886756237, "grad_norm": 1.6712568998336792, "kl": 0.32456323504447937, "learning_rate": 3.918964359868265e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1114 }, { "completion_length": 139.07144165039062, "epoch": 1.0700575815738964, "grad_norm": 2.0442471504211426, "kl": 0.3797508776187897, "learning_rate": 3.9167391438917963e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1115 }, { "completion_length": 168.07144165039062, "epoch": 1.0710172744721689, "grad_norm": 2.0716166496276855, "kl": 0.30689510703086853, "learning_rate": 3.914512273290513e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1116 }, { "completion_length": 193.7857208251953, "epoch": 1.0719769673704413, "grad_norm": 1.7726455926895142, "kl": 0.2707095742225647, "learning_rate": 3.912283750665202e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1117 }, { "completion_length": 142.57144165039062, "epoch": 1.072936660268714, "grad_norm": 2.1476807594299316, "kl": 0.36615556478500366, "learning_rate": 3.910053578618579e-07, "loss": 0.0004, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1118 }, { "completion_length": 173.92857360839844, "epoch": 1.0738963531669865, "grad_norm": 1.0581178665161133, "kl": 0.7409386038780212, "learning_rate": 3.907821759755285e-07, "loss": 0.0007, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1119 }, { "completion_length": 174.21429443359375, "epoch": 1.0748560460652592, "grad_norm": 0.7554715275764465, "kl": 0.32895010709762573, "learning_rate": 3.905588296681885e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1120 }, { "completion_length": 193.6428680419922, "epoch": 1.0758157389635317, "grad_norm": 0.821484386920929, "kl": 0.24095328152179718, "learning_rate": 3.903353192006865e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1121 }, { "completion_length": 166.21429443359375, "epoch": 1.0767754318618041, "grad_norm": 1.7270039319992065, "kl": 0.3466769754886627, "learning_rate": 3.901116448340629e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1122 }, { "completion_length": 158.0, "epoch": 1.0777351247600768, "grad_norm": 1.8250023126602173, "kl": 0.35978320240974426, "learning_rate": 3.898878068295491e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1123 }, { "completion_length": 220.57144165039062, "epoch": 1.0786948176583493, "grad_norm": 1.4428375959396362, "kl": 0.2878282070159912, "learning_rate": 3.8966380544856816e-07, "loss": 0.0003, "reward": 1.5357143878936768, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1124 }, { "completion_length": 172.7857208251953, "epoch": 1.079654510556622, "grad_norm": 1.942779541015625, "kl": 0.3459227383136749, "learning_rate": 3.8943964095273354e-07, "loss": 0.0003, "reward": 1.4285714626312256, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1125 }, { "completion_length": 162.5, "epoch": 1.0806142034548945, "grad_norm": 1.1849390268325806, "kl": 0.36685889959335327, "learning_rate": 3.892153136038493e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1126 }, { "completion_length": 159.92857360839844, "epoch": 1.081573896353167, "grad_norm": 2.774545192718506, "kl": 0.3978706896305084, "learning_rate": 3.8899082366391e-07, "loss": 0.0004, "reward": 1.5000001192092896, "reward_std": 0.7071067690849304, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1127 }, { "completion_length": 167.7857208251953, "epoch": 1.0825335892514396, "grad_norm": 0.7065428495407104, "kl": 0.3554045855998993, "learning_rate": 3.887661713950996e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1128 }, { "completion_length": 177.42857360839844, "epoch": 1.083493282149712, "grad_norm": 2.3589515686035156, "kl": 0.3786659836769104, "learning_rate": 3.8854135705979196e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1129 }, { "completion_length": 168.71429443359375, "epoch": 1.0844529750479845, "grad_norm": 2.480414867401123, "kl": 0.3762117326259613, "learning_rate": 3.883163809205503e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1130 }, { "completion_length": 204.57144165039062, "epoch": 1.0854126679462572, "grad_norm": 0.013991447165608406, "kl": 0.2827664613723755, "learning_rate": 3.880912432401264e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1131 }, { "completion_length": 183.92857360839844, "epoch": 1.0863723608445297, "grad_norm": 0.6141138076782227, "kl": 0.3415330648422241, "learning_rate": 3.8786594428146126e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1132 }, { "completion_length": 146.85714721679688, "epoch": 1.0873320537428024, "grad_norm": 1.4142476320266724, "kl": 0.47283610701560974, "learning_rate": 3.876404843076837e-07, "loss": 0.0005, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1133 }, { "completion_length": 159.2857208251953, "epoch": 1.0882917466410749, "grad_norm": 1.6403586864471436, "kl": 0.40482109785079956, "learning_rate": 3.8741486358211086e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1134 }, { "completion_length": 117.28572082519531, "epoch": 1.0892514395393473, "grad_norm": 1.6191809177398682, "kl": 0.43235448002815247, "learning_rate": 3.871890823682477e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1135 }, { "completion_length": 194.2857208251953, "epoch": 1.09021113243762, "grad_norm": 0.986426830291748, "kl": 0.25767073035240173, "learning_rate": 3.8696314092978656e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1136 }, { "completion_length": 136.85714721679688, "epoch": 1.0911708253358925, "grad_norm": 1.515556812286377, "kl": 0.4302854835987091, "learning_rate": 3.867370395306068e-07, "loss": 0.0004, "reward": 1.5000001192092896, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1137 }, { "completion_length": 196.42857360839844, "epoch": 1.092130518234165, "grad_norm": 1.5575640201568604, "kl": 0.33694255352020264, "learning_rate": 3.8651077843477455e-07, "loss": 0.0003, "reward": 1.571428656578064, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1138 }, { "completion_length": 192.6428680419922, "epoch": 1.0930902111324377, "grad_norm": 1.1827517747879028, "kl": 0.2791388928890228, "learning_rate": 3.862843579065427e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1139 }, { "completion_length": 145.07144165039062, "epoch": 1.0940499040307101, "grad_norm": 0.013225818052887917, "kl": 0.37293606996536255, "learning_rate": 3.8605777821035014e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1140 }, { "completion_length": 186.7857208251953, "epoch": 1.0950095969289828, "grad_norm": 1.4239174127578735, "kl": 0.3444064259529114, "learning_rate": 3.8583103961082184e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1141 }, { "completion_length": 182.50001525878906, "epoch": 1.0959692898272553, "grad_norm": 0.8875599503517151, "kl": 0.34647759795188904, "learning_rate": 3.856041423727681e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1142 }, { "completion_length": 172.57144165039062, "epoch": 1.0969289827255277, "grad_norm": 1.3698856830596924, "kl": 0.3206397593021393, "learning_rate": 3.853770867611847e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1143 }, { "completion_length": 208.35714721679688, "epoch": 1.0978886756238004, "grad_norm": 0.014528803527355194, "kl": 0.28912481665611267, "learning_rate": 3.8514987304125235e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1144 }, { "completion_length": 212.07144165039062, "epoch": 1.098848368522073, "grad_norm": 0.9934359192848206, "kl": 0.2869039475917816, "learning_rate": 3.8492250147833634e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1145 }, { "completion_length": 225.57144165039062, "epoch": 1.0998080614203456, "grad_norm": 1.7635587453842163, "kl": 0.31285566091537476, "learning_rate": 3.8469497233798634e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1146 }, { "completion_length": 161.71429443359375, "epoch": 1.100767754318618, "grad_norm": 1.0723339319229126, "kl": 0.42492878437042236, "learning_rate": 3.844672858859361e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1147 }, { "completion_length": 225.07144165039062, "epoch": 1.1017274472168905, "grad_norm": 1.127921223640442, "kl": 0.2734018862247467, "learning_rate": 3.842394423881031e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1148 }, { "completion_length": 160.71429443359375, "epoch": 1.1026871401151632, "grad_norm": 1.4955753087997437, "kl": 0.4183797538280487, "learning_rate": 3.8401144211058804e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1149 }, { "completion_length": 145.92857360839844, "epoch": 1.1036468330134357, "grad_norm": 2.2043514251708984, "kl": 0.4506584703922272, "learning_rate": 3.8378328531967507e-07, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1150 }, { "completion_length": 169.21429443359375, "epoch": 1.1046065259117082, "grad_norm": 0.03205137699842453, "kl": 0.43326738476753235, "learning_rate": 3.835549722818309e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1151 }, { "completion_length": 189.57144165039062, "epoch": 1.1055662188099808, "grad_norm": 0.027284737676382065, "kl": 0.34702855348587036, "learning_rate": 3.8332650326370475e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1152 }, { "completion_length": 181.35714721679688, "epoch": 1.1065259117082533, "grad_norm": 2.2777953147888184, "kl": 0.33712342381477356, "learning_rate": 3.83097878532128e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1153 }, { "completion_length": 158.42857360839844, "epoch": 1.107485604606526, "grad_norm": 1.8048875331878662, "kl": 0.4039619266986847, "learning_rate": 3.82869098354114e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1154 }, { "completion_length": 208.85714721679688, "epoch": 1.1084452975047985, "grad_norm": 0.014341630972921848, "kl": 0.27807721495628357, "learning_rate": 3.826401629968574e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1155 }, { "completion_length": 205.92857360839844, "epoch": 1.109404990403071, "grad_norm": 1.995648741722107, "kl": 0.3046196699142456, "learning_rate": 3.8241107272773446e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1156 }, { "completion_length": 221.4285888671875, "epoch": 1.1103646833013436, "grad_norm": 0.7434324622154236, "kl": 0.3157734274864197, "learning_rate": 3.8218182781430217e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1157 }, { "completion_length": 193.00001525878906, "epoch": 1.111324376199616, "grad_norm": 2.0127768516540527, "kl": 0.34009307622909546, "learning_rate": 3.8195242852429796e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1158 }, { "completion_length": 161.71429443359375, "epoch": 1.1122840690978886, "grad_norm": 0.9429745078086853, "kl": 0.4028349816799164, "learning_rate": 3.817228751256397e-07, "loss": 0.0004, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1159 }, { "completion_length": 229.57144165039062, "epoch": 1.1132437619961613, "grad_norm": 1.2414830923080444, "kl": 0.30453288555145264, "learning_rate": 3.814931678864254e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1160 }, { "completion_length": 226.00001525878906, "epoch": 1.1142034548944337, "grad_norm": 1.3378427028656006, "kl": 0.30216512084007263, "learning_rate": 3.812633070749326e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1161 }, { "completion_length": 195.21429443359375, "epoch": 1.1151631477927064, "grad_norm": 1.4394062757492065, "kl": 0.3246981203556061, "learning_rate": 3.810332929596181e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1162 }, { "completion_length": 150.6428680419922, "epoch": 1.116122840690979, "grad_norm": 3.6315433979034424, "kl": 0.4843553602695465, "learning_rate": 3.808031258091179e-07, "loss": 0.0005, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1163 }, { "completion_length": 147.42857360839844, "epoch": 1.1170825335892514, "grad_norm": 1.7704585790634155, "kl": 0.47232651710510254, "learning_rate": 3.8057280589224665e-07, "loss": 0.0005, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1164 }, { "completion_length": 187.92857360839844, "epoch": 1.118042226487524, "grad_norm": 1.135862112045288, "kl": 0.38144174218177795, "learning_rate": 3.803423334779975e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1165 }, { "completion_length": 123.21429443359375, "epoch": 1.1190019193857965, "grad_norm": 0.692273736000061, "kl": 0.4400380253791809, "learning_rate": 3.8011170883554166e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1166 }, { "completion_length": 145.1428680419922, "epoch": 1.1199616122840692, "grad_norm": 0.8855399489402771, "kl": 0.44380515813827515, "learning_rate": 3.7988093223422804e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1167 }, { "completion_length": 270.8571472167969, "epoch": 1.1209213051823417, "grad_norm": 1.0914463996887207, "kl": 0.25323253870010376, "learning_rate": 3.7965000394358316e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1168 }, { "completion_length": 178.1428680419922, "epoch": 1.1218809980806141, "grad_norm": 1.3325294256210327, "kl": 0.42358633875846863, "learning_rate": 3.794189242333106e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1169 }, { "completion_length": 168.1428680419922, "epoch": 1.1228406909788868, "grad_norm": 1.8313056230545044, "kl": 0.40265733003616333, "learning_rate": 3.7918769337329085e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1170 }, { "completion_length": 219.85714721679688, "epoch": 1.1238003838771593, "grad_norm": 1.5835679769515991, "kl": 0.3082060217857361, "learning_rate": 3.78956311633581e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1171 }, { "completion_length": 179.92857360839844, "epoch": 1.1247600767754318, "grad_norm": 2.2067208290100098, "kl": 0.45085740089416504, "learning_rate": 3.787247792844141e-07, "loss": 0.0005, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1172 }, { "completion_length": 203.2857208251953, "epoch": 1.1257197696737045, "grad_norm": 0.013978196308016777, "kl": 0.3152765929698944, "learning_rate": 3.784930965961994e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1173 }, { "completion_length": 186.35714721679688, "epoch": 1.126679462571977, "grad_norm": 1.070854663848877, "kl": 0.40420037508010864, "learning_rate": 3.782612638395215e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1174 }, { "completion_length": 261.2857360839844, "epoch": 1.1276391554702494, "grad_norm": 1.2420620918273926, "kl": 0.2811175584793091, "learning_rate": 3.7802928128514034e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1175 }, { "completion_length": 191.07144165039062, "epoch": 1.128598848368522, "grad_norm": 0.6973476409912109, "kl": 0.4557143747806549, "learning_rate": 3.777971492039909e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1176 }, { "completion_length": 180.21429443359375, "epoch": 1.1295585412667946, "grad_norm": 2.3768489360809326, "kl": 0.4427265524864197, "learning_rate": 3.7756486786718267e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1177 }, { "completion_length": 167.35714721679688, "epoch": 1.1305182341650672, "grad_norm": 2.234508514404297, "kl": 0.4989181458950043, "learning_rate": 3.7733243754599956e-07, "loss": 0.0005, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1178 }, { "completion_length": 173.07144165039062, "epoch": 1.1314779270633397, "grad_norm": 2.218514919281006, "kl": 0.5259931683540344, "learning_rate": 3.770998585118994e-07, "loss": 0.0005, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1179 }, { "completion_length": 165.92857360839844, "epoch": 1.1324376199616122, "grad_norm": 0.932837188243866, "kl": 0.40488532185554504, "learning_rate": 3.768671310365137e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1180 }, { "completion_length": 167.07144165039062, "epoch": 1.1333973128598849, "grad_norm": 0.871694028377533, "kl": 0.35382845997810364, "learning_rate": 3.7663425539164753e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1181 }, { "completion_length": 140.42857360839844, "epoch": 1.1343570057581573, "grad_norm": 1.9135689735412598, "kl": 0.5563753843307495, "learning_rate": 3.7640123184927867e-07, "loss": 0.0006, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1182 }, { "completion_length": 199.50001525878906, "epoch": 1.13531669865643, "grad_norm": 0.9422529935836792, "kl": 0.37690842151641846, "learning_rate": 3.761680606815578e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1183 }, { "completion_length": 188.07144165039062, "epoch": 1.1362763915547025, "grad_norm": 1.7669681310653687, "kl": 0.28527364134788513, "learning_rate": 3.759347421608082e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1184 }, { "completion_length": 158.92857360839844, "epoch": 1.137236084452975, "grad_norm": 0.016278883442282677, "kl": 0.339408278465271, "learning_rate": 3.7570127655952496e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1185 }, { "completion_length": 209.50001525878906, "epoch": 1.1381957773512477, "grad_norm": 1.282151699066162, "kl": 0.3380278944969177, "learning_rate": 3.75467664150375e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1186 }, { "completion_length": 152.7857208251953, "epoch": 1.1391554702495201, "grad_norm": 0.03349420800805092, "kl": 0.44588643312454224, "learning_rate": 3.752339052061968e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1187 }, { "completion_length": 180.07144165039062, "epoch": 1.1401151631477928, "grad_norm": 1.6131712198257446, "kl": 0.5104025602340698, "learning_rate": 3.75e-07, "loss": 0.0005, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1188 }, { "completion_length": 137.0, "epoch": 1.1410748560460653, "grad_norm": 1.5545928478240967, "kl": 0.46694260835647583, "learning_rate": 3.747659488049649e-07, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1189 }, { "completion_length": 203.00001525878906, "epoch": 1.1420345489443378, "grad_norm": 0.8087463974952698, "kl": 0.32299014925956726, "learning_rate": 3.7453175189444245e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1190 }, { "completion_length": 176.85714721679688, "epoch": 1.1429942418426104, "grad_norm": 1.8449070453643799, "kl": 0.310975044965744, "learning_rate": 3.742974095419538e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1191 }, { "completion_length": 189.1428680419922, "epoch": 1.143953934740883, "grad_norm": 1.3516579866409302, "kl": 0.30343514680862427, "learning_rate": 3.740629220211896e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1192 }, { "completion_length": 159.7857208251953, "epoch": 1.1449136276391554, "grad_norm": 2.018433094024658, "kl": 0.5771712064743042, "learning_rate": 3.7382828960601067e-07, "loss": 0.0006, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1193 }, { "completion_length": 221.1428680419922, "epoch": 1.145873320537428, "grad_norm": 1.640122652053833, "kl": 0.2898763418197632, "learning_rate": 3.735935125704466e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1194 }, { "completion_length": 156.2857208251953, "epoch": 1.1468330134357005, "grad_norm": 2.329439163208008, "kl": 0.400996595621109, "learning_rate": 3.73358591188696e-07, "loss": 0.0004, "reward": 1.6071429252624512, "reward_std": 0.45456868410110474, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1195 }, { "completion_length": 158.71429443359375, "epoch": 1.147792706333973, "grad_norm": 2.489528179168701, "kl": 0.3032777011394501, "learning_rate": 3.73123525735126e-07, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1196 }, { "completion_length": 176.7857208251953, "epoch": 1.1487523992322457, "grad_norm": 0.010371050797402859, "kl": 0.30582675337791443, "learning_rate": 3.7288831648427217e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1197 }, { "completion_length": 188.35714721679688, "epoch": 1.1497120921305182, "grad_norm": 1.7805904150009155, "kl": 0.29900118708610535, "learning_rate": 3.726529637108379e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1198 }, { "completion_length": 195.92857360839844, "epoch": 1.1506717850287909, "grad_norm": 1.9781526327133179, "kl": 0.26978549361228943, "learning_rate": 3.724174676896941e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1199 }, { "completion_length": 203.50001525878906, "epoch": 1.1516314779270633, "grad_norm": 0.012347865849733353, "kl": 0.25005945563316345, "learning_rate": 3.721818286958792e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1200 }, { "completion_length": 182.85714721679688, "epoch": 1.1525911708253358, "grad_norm": 1.621994137763977, "kl": 0.26895999908447266, "learning_rate": 3.7194604700459844e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1201 }, { "completion_length": 207.35714721679688, "epoch": 1.1535508637236085, "grad_norm": 1.041718602180481, "kl": 0.22139033675193787, "learning_rate": 3.717101228912239e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1202 }, { "completion_length": 130.42857360839844, "epoch": 1.154510556621881, "grad_norm": 1.9425976276397705, "kl": 0.37766996026039124, "learning_rate": 3.714740566312938e-07, "loss": 0.0004, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1203 }, { "completion_length": 145.85714721679688, "epoch": 1.1554702495201536, "grad_norm": 1.209244966506958, "kl": 0.31292814016342163, "learning_rate": 3.712378485005123e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1204 }, { "completion_length": 145.1428680419922, "epoch": 1.1564299424184261, "grad_norm": 0.8481686115264893, "kl": 0.3460707366466522, "learning_rate": 3.7100149877474976e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1205 }, { "completion_length": 200.00001525878906, "epoch": 1.1573896353166986, "grad_norm": 0.45723026990890503, "kl": 0.22607751190662384, "learning_rate": 3.7076500773004115e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1206 }, { "completion_length": 185.85714721679688, "epoch": 1.1583493282149713, "grad_norm": 1.420723557472229, "kl": 0.2860151529312134, "learning_rate": 3.705283756425872e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1207 }, { "completion_length": 199.00001525878906, "epoch": 1.1593090211132437, "grad_norm": 1.155799150466919, "kl": 0.2613682746887207, "learning_rate": 3.70291602788753e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1208 }, { "completion_length": 163.42857360839844, "epoch": 1.1602687140115164, "grad_norm": 0.010356533341109753, "kl": 0.29654645919799805, "learning_rate": 3.70054689445068e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1209 }, { "completion_length": 181.92857360839844, "epoch": 1.161228406909789, "grad_norm": 0.015155164524912834, "kl": 0.3019307553768158, "learning_rate": 3.698176358882258e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1210 }, { "completion_length": 150.2857208251953, "epoch": 1.1621880998080614, "grad_norm": 1.4409770965576172, "kl": 0.36410728096961975, "learning_rate": 3.6958044239508393e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1211 }, { "completion_length": 145.6428680419922, "epoch": 1.163147792706334, "grad_norm": 1.113458275794983, "kl": 0.2708686292171478, "learning_rate": 3.6934310924266313e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1212 }, { "completion_length": 187.92857360839844, "epoch": 1.1641074856046065, "grad_norm": 0.026935361325740814, "kl": 0.28108668327331543, "learning_rate": 3.691056367081474e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1213 }, { "completion_length": 155.85714721679688, "epoch": 1.165067178502879, "grad_norm": 1.4619797468185425, "kl": 0.2639625668525696, "learning_rate": 3.688680250688833e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1214 }, { "completion_length": 162.6428680419922, "epoch": 1.1660268714011517, "grad_norm": 0.7631334662437439, "kl": 0.2944657802581787, "learning_rate": 3.686302746023802e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1215 }, { "completion_length": 194.7857208251953, "epoch": 1.1669865642994242, "grad_norm": 1.228739857673645, "kl": 0.2116396129131317, "learning_rate": 3.683923855863094e-07, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1216 }, { "completion_length": 135.42857360839844, "epoch": 1.1679462571976966, "grad_norm": 2.4193801879882812, "kl": 0.34899380803108215, "learning_rate": 3.681543582985038e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1217 }, { "completion_length": 126.00000762939453, "epoch": 1.1689059500959693, "grad_norm": 0.016835758462548256, "kl": 0.3218028247356415, "learning_rate": 3.679161930169582e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1218 }, { "completion_length": 173.00001525878906, "epoch": 1.1698656429942418, "grad_norm": 1.1965428590774536, "kl": 0.255247563123703, "learning_rate": 3.676778900198284e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1219 }, { "completion_length": 167.0, "epoch": 1.1708253358925145, "grad_norm": 1.7117925882339478, "kl": 0.2485419064760208, "learning_rate": 3.67439449585431e-07, "loss": 0.0002, "reward": 1.4285714626312256, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.5, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1220 }, { "completion_length": 219.00001525878906, "epoch": 1.171785028790787, "grad_norm": 0.789849579334259, "kl": 0.24550262093544006, "learning_rate": 3.67200871992243e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1221 }, { "completion_length": 178.50001525878906, "epoch": 1.1727447216890594, "grad_norm": 0.7901924848556519, "kl": 0.2231241911649704, "learning_rate": 3.6696215751890195e-07, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1222 }, { "completion_length": 176.00001525878906, "epoch": 1.173704414587332, "grad_norm": 0.012768877670168877, "kl": 0.28841957449913025, "learning_rate": 3.667233064442049e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1223 }, { "completion_length": 150.1428680419922, "epoch": 1.1746641074856046, "grad_norm": 0.6090799570083618, "kl": 0.3089803159236908, "learning_rate": 3.6648431904710856e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1224 }, { "completion_length": 176.7857208251953, "epoch": 1.1756238003838773, "grad_norm": 1.871325969696045, "kl": 0.3112119436264038, "learning_rate": 3.66245195606729e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1225 }, { "completion_length": 159.2857208251953, "epoch": 1.1765834932821497, "grad_norm": 1.8122371435165405, "kl": 0.27607160806655884, "learning_rate": 3.660059364023408e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1226 }, { "completion_length": 179.2857208251953, "epoch": 1.1775431861804222, "grad_norm": 1.3862378597259521, "kl": 0.24850980937480927, "learning_rate": 3.6576654171337763e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1227 }, { "completion_length": 184.71429443359375, "epoch": 1.1785028790786949, "grad_norm": 1.1886166334152222, "kl": 0.23666004836559296, "learning_rate": 3.6552701181943103e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1228 }, { "completion_length": 146.5, "epoch": 1.1794625719769674, "grad_norm": 0.8369507193565369, "kl": 0.36449918150901794, "learning_rate": 3.652873470002504e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1229 }, { "completion_length": 143.92857360839844, "epoch": 1.18042226487524, "grad_norm": 0.5832663178443909, "kl": 0.3844743072986603, "learning_rate": 3.650475475357429e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1230 }, { "completion_length": 202.85714721679688, "epoch": 1.1813819577735125, "grad_norm": 0.5890225172042847, "kl": 0.24783994257450104, "learning_rate": 3.6480761370597297e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1231 }, { "completion_length": 180.92857360839844, "epoch": 1.182341650671785, "grad_norm": 1.177392840385437, "kl": 0.3045493960380554, "learning_rate": 3.645675457911619e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1232 }, { "completion_length": 209.1428680419922, "epoch": 1.1833013435700577, "grad_norm": 0.010740749537944794, "kl": 0.2211105078458786, "learning_rate": 3.6432734407168745e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1233 }, { "completion_length": 195.2857208251953, "epoch": 1.1842610364683301, "grad_norm": 0.010899125598371029, "kl": 0.2361993044614792, "learning_rate": 3.640870088280839e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1234 }, { "completion_length": 173.07144165039062, "epoch": 1.1852207293666026, "grad_norm": 1.2372400760650635, "kl": 0.2629808783531189, "learning_rate": 3.6384654034104126e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1235 }, { "completion_length": 156.21429443359375, "epoch": 1.1861804222648753, "grad_norm": 0.8395079970359802, "kl": 0.2804495096206665, "learning_rate": 3.636059388914054e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1236 }, { "completion_length": 174.50001525878906, "epoch": 1.1871401151631478, "grad_norm": 0.9987436532974243, "kl": 0.31973105669021606, "learning_rate": 3.6336520476017715e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1237 }, { "completion_length": 134.42857360839844, "epoch": 1.1880998080614202, "grad_norm": 0.011923996731638908, "kl": 0.3651130497455597, "learning_rate": 3.6312433822851265e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1238 }, { "completion_length": 209.1428680419922, "epoch": 1.189059500959693, "grad_norm": 0.011791710741817951, "kl": 0.2575838565826416, "learning_rate": 3.6288333957772234e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1239 }, { "completion_length": 181.6428680419922, "epoch": 1.1900191938579654, "grad_norm": 0.8866697549819946, "kl": 0.27397555112838745, "learning_rate": 3.626422090892712e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1240 }, { "completion_length": 221.6428680419922, "epoch": 1.190978886756238, "grad_norm": 0.7019402980804443, "kl": 0.25726011395454407, "learning_rate": 3.6240094704477814e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1241 }, { "completion_length": 246.07144165039062, "epoch": 1.1919385796545106, "grad_norm": 0.5852331519126892, "kl": 0.23709610104560852, "learning_rate": 3.6215955372601563e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1242 }, { "completion_length": 191.42857360839844, "epoch": 1.192898272552783, "grad_norm": 0.010184546932578087, "kl": 0.22268694639205933, "learning_rate": 3.6191802941490956e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1243 }, { "completion_length": 214.6428680419922, "epoch": 1.1938579654510557, "grad_norm": 0.012677947990596294, "kl": 0.2672615647315979, "learning_rate": 3.616763743935387e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1244 }, { "completion_length": 175.42857360839844, "epoch": 1.1948176583493282, "grad_norm": 2.1773428916931152, "kl": 0.32695117592811584, "learning_rate": 3.614345889441346e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1245 }, { "completion_length": 205.00001525878906, "epoch": 1.1957773512476009, "grad_norm": 1.1700634956359863, "kl": 0.27416640520095825, "learning_rate": 3.61192673349081e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1246 }, { "completion_length": 163.0, "epoch": 1.1967370441458733, "grad_norm": 0.8116897344589233, "kl": 0.31404703855514526, "learning_rate": 3.609506278909138e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1247 }, { "completion_length": 148.7857208251953, "epoch": 1.1976967370441458, "grad_norm": 1.1405105590820312, "kl": 0.4011298716068268, "learning_rate": 3.6070845285232034e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1248 }, { "completion_length": 162.1428680419922, "epoch": 1.1986564299424185, "grad_norm": 1.9256361722946167, "kl": 0.28224125504493713, "learning_rate": 3.6046614851613963e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1249 }, { "completion_length": 178.1428680419922, "epoch": 1.199616122840691, "grad_norm": 1.1297776699066162, "kl": 0.311379075050354, "learning_rate": 3.6022371516536143e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1250 }, { "completion_length": 177.6428680419922, "epoch": 1.2005758157389634, "grad_norm": 0.024438994005322456, "kl": 0.32133960723876953, "learning_rate": 3.5998115308312614e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1251 }, { "completion_length": 183.42857360839844, "epoch": 1.2015355086372361, "grad_norm": 0.7742099165916443, "kl": 0.30763572454452515, "learning_rate": 3.597384625527248e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1252 }, { "completion_length": 125.85714721679688, "epoch": 1.2024952015355086, "grad_norm": 1.0602903366088867, "kl": 0.506099283695221, "learning_rate": 3.594956438575981e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1253 }, { "completion_length": 163.0, "epoch": 1.2034548944337813, "grad_norm": 0.8403517007827759, "kl": 0.3736266791820526, "learning_rate": 3.592526972813367e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1254 }, { "completion_length": 189.85714721679688, "epoch": 1.2044145873320538, "grad_norm": 1.1600974798202515, "kl": 0.29996225237846375, "learning_rate": 3.590096231076804e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1255 }, { "completion_length": 149.71429443359375, "epoch": 1.2053742802303262, "grad_norm": 0.7393319010734558, "kl": 0.4376939833164215, "learning_rate": 3.587664216205183e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1256 }, { "completion_length": 128.21429443359375, "epoch": 1.206333973128599, "grad_norm": 0.012917404994368553, "kl": 0.37554776668548584, "learning_rate": 3.585230931038879e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1257 }, { "completion_length": 221.1428680419922, "epoch": 1.2072936660268714, "grad_norm": 1.4731305837631226, "kl": 0.28300800919532776, "learning_rate": 3.5827963784197516e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1258 }, { "completion_length": 164.57144165039062, "epoch": 1.2082533589251438, "grad_norm": 1.402929425239563, "kl": 0.2818770408630371, "learning_rate": 3.5803605611911426e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1259 }, { "completion_length": 243.7857208251953, "epoch": 1.2092130518234165, "grad_norm": 0.012666034512221813, "kl": 0.2079341858625412, "learning_rate": 3.5779234821978665e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1260 }, { "completion_length": 222.21429443359375, "epoch": 1.210172744721689, "grad_norm": 0.013398679904639721, "kl": 0.3993920683860779, "learning_rate": 3.575485144286215e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1261 }, { "completion_length": 161.42857360839844, "epoch": 1.2111324376199617, "grad_norm": 0.01147587038576603, "kl": 0.34524667263031006, "learning_rate": 3.5730455503039505e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1262 }, { "completion_length": 183.21429443359375, "epoch": 1.2120921305182342, "grad_norm": 0.02665029466152191, "kl": 0.24794569611549377, "learning_rate": 3.570604703100299e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1263 }, { "completion_length": 142.85714721679688, "epoch": 1.2130518234165066, "grad_norm": 0.014069630764424801, "kl": 0.3548809885978699, "learning_rate": 3.568162605525952e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1264 }, { "completion_length": 174.71429443359375, "epoch": 1.2140115163147793, "grad_norm": 0.011829344555735588, "kl": 0.28332921862602234, "learning_rate": 3.565719260433063e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1265 }, { "completion_length": 174.07144165039062, "epoch": 1.2149712092130518, "grad_norm": 1.8884607553482056, "kl": 0.29339873790740967, "learning_rate": 3.56327467067524e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1266 }, { "completion_length": 236.50001525878906, "epoch": 1.2159309021113245, "grad_norm": 1.1765875816345215, "kl": 0.21760967373847961, "learning_rate": 3.5608288391075457e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1267 }, { "completion_length": 181.85714721679688, "epoch": 1.216890595009597, "grad_norm": 0.02063695155084133, "kl": 0.301067054271698, "learning_rate": 3.5583817685864925e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1268 }, { "completion_length": 162.1428680419922, "epoch": 1.2178502879078694, "grad_norm": 0.026709681376814842, "kl": 0.4456559121608734, "learning_rate": 3.5559334619700407e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1269 }, { "completion_length": 176.42857360839844, "epoch": 1.2188099808061421, "grad_norm": 1.7262625694274902, "kl": 0.29617971181869507, "learning_rate": 3.553483922117594e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1270 }, { "completion_length": 151.07144165039062, "epoch": 1.2197696737044146, "grad_norm": 0.6721022129058838, "kl": 0.40901023149490356, "learning_rate": 3.5510331518899963e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1271 }, { "completion_length": 146.2857208251953, "epoch": 1.220729366602687, "grad_norm": 1.6121766567230225, "kl": 0.33728277683258057, "learning_rate": 3.548581154149527e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1272 }, { "completion_length": 160.21429443359375, "epoch": 1.2216890595009597, "grad_norm": 1.1924163103103638, "kl": 0.3449403941631317, "learning_rate": 3.5461279317599025e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1273 }, { "completion_length": 165.1428680419922, "epoch": 1.2226487523992322, "grad_norm": 1.2354252338409424, "kl": 0.3195232152938843, "learning_rate": 3.543673487586266e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1274 }, { "completion_length": 190.85714721679688, "epoch": 1.2236084452975047, "grad_norm": 1.012823462486267, "kl": 0.26624682545661926, "learning_rate": 3.54121782449519e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1275 }, { "completion_length": 149.6428680419922, "epoch": 1.2245681381957774, "grad_norm": 2.4685306549072266, "kl": 0.3436487317085266, "learning_rate": 3.53876094535467e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1276 }, { "completion_length": 188.00001525878906, "epoch": 1.2255278310940498, "grad_norm": 1.4914216995239258, "kl": 0.26662808656692505, "learning_rate": 3.5363028530341197e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1277 }, { "completion_length": 125.28572082519531, "epoch": 1.2264875239923225, "grad_norm": 1.6565227508544922, "kl": 0.3953820765018463, "learning_rate": 3.533843550404373e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1278 }, { "completion_length": 198.85714721679688, "epoch": 1.227447216890595, "grad_norm": 0.6857388019561768, "kl": 0.2939409613609314, "learning_rate": 3.5313830403376754e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1279 }, { "completion_length": 192.7857208251953, "epoch": 1.2284069097888675, "grad_norm": 1.2754404544830322, "kl": 0.2943551540374756, "learning_rate": 3.528921325707683e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1280 }, { "completion_length": 174.6428680419922, "epoch": 1.2293666026871402, "grad_norm": 1.2908097505569458, "kl": 0.3094550669193268, "learning_rate": 3.5264584093894584e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1281 }, { "completion_length": 146.57144165039062, "epoch": 1.2303262955854126, "grad_norm": 0.021228723227977753, "kl": 0.4008694589138031, "learning_rate": 3.523994294259468e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1282 }, { "completion_length": 154.07144165039062, "epoch": 1.2312859884836853, "grad_norm": 0.8602327108383179, "kl": 0.32800573110580444, "learning_rate": 3.5215289831955786e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1283 }, { "completion_length": 212.35714721679688, "epoch": 1.2322456813819578, "grad_norm": 1.0485217571258545, "kl": 0.24536563456058502, "learning_rate": 3.5190624790770536e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1284 }, { "completion_length": 183.2857208251953, "epoch": 1.2332053742802302, "grad_norm": 0.7769949436187744, "kl": 0.3191581964492798, "learning_rate": 3.5165947847845484e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1285 }, { "completion_length": 160.92857360839844, "epoch": 1.234165067178503, "grad_norm": 0.01946607604622841, "kl": 0.3207699656486511, "learning_rate": 3.51412590320011e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1286 }, { "completion_length": 173.71429443359375, "epoch": 1.2351247600767754, "grad_norm": 1.5857291221618652, "kl": 0.29975995421409607, "learning_rate": 3.5116558372071726e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1287 }, { "completion_length": 180.42857360839844, "epoch": 1.236084452975048, "grad_norm": 1.5197030305862427, "kl": 0.3164657950401306, "learning_rate": 3.5091845896905523e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1288 }, { "completion_length": 183.00001525878906, "epoch": 1.2370441458733206, "grad_norm": 0.009333804249763489, "kl": 0.2781030833721161, "learning_rate": 3.506712163536445e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1289 }, { "completion_length": 153.92857360839844, "epoch": 1.238003838771593, "grad_norm": 1.2875480651855469, "kl": 0.3310268521308899, "learning_rate": 3.5042385616324236e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1290 }, { "completion_length": 155.71429443359375, "epoch": 1.2389635316698657, "grad_norm": 1.2703499794006348, "kl": 0.2763974368572235, "learning_rate": 3.501763786867434e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1291 }, { "completion_length": 169.21429443359375, "epoch": 1.2399232245681382, "grad_norm": 1.7674338817596436, "kl": 0.3062077462673187, "learning_rate": 3.4992878421317945e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1292 }, { "completion_length": 164.1428680419922, "epoch": 1.2408829174664107, "grad_norm": 0.012690943665802479, "kl": 0.30656898021698, "learning_rate": 3.496810730317185e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1293 }, { "completion_length": 154.42857360839844, "epoch": 1.2418426103646834, "grad_norm": 0.008985031396150589, "kl": 0.2866956889629364, "learning_rate": 3.494332454316651e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1294 }, { "completion_length": 169.1428680419922, "epoch": 1.2428023032629558, "grad_norm": 1.606265664100647, "kl": 0.30874162912368774, "learning_rate": 3.4918530170245985e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1295 }, { "completion_length": 160.71429443359375, "epoch": 1.2437619961612283, "grad_norm": 0.020420722663402557, "kl": 0.3619067370891571, "learning_rate": 3.4893724213367885e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1296 }, { "completion_length": 140.85714721679688, "epoch": 1.244721689059501, "grad_norm": 0.6088277101516724, "kl": 0.3787159025669098, "learning_rate": 3.486890670150336e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1297 }, { "completion_length": 195.6428680419922, "epoch": 1.2456813819577734, "grad_norm": 1.1286324262619019, "kl": 0.27424541115760803, "learning_rate": 3.484407766363703e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1298 }, { "completion_length": 140.35714721679688, "epoch": 1.2466410748560461, "grad_norm": 0.6959654688835144, "kl": 0.3403673768043518, "learning_rate": 3.481923712876701e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1299 }, { "completion_length": 189.07144165039062, "epoch": 1.2476007677543186, "grad_norm": 0.7069809436798096, "kl": 0.28981178998947144, "learning_rate": 3.479438512590482e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1300 }, { "completion_length": 184.6428680419922, "epoch": 1.248560460652591, "grad_norm": 1.7039008140563965, "kl": 0.29209238290786743, "learning_rate": 3.4769521684075387e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1301 }, { "completion_length": 172.2857208251953, "epoch": 1.2495201535508638, "grad_norm": 1.0467689037322998, "kl": 0.274996817111969, "learning_rate": 3.474464683231698e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1302 }, { "completion_length": 140.5, "epoch": 1.2504798464491362, "grad_norm": 3.1517624855041504, "kl": 0.43848270177841187, "learning_rate": 3.471976059968121e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1303 }, { "completion_length": 188.50001525878906, "epoch": 1.251439539347409, "grad_norm": 0.006224079057574272, "kl": 0.25632211565971375, "learning_rate": 3.4694863015232985e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1304 }, { "completion_length": 161.07144165039062, "epoch": 1.2523992322456814, "grad_norm": 0.7304441928863525, "kl": 0.37060487270355225, "learning_rate": 3.466995410805045e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1305 }, { "completion_length": 158.71429443359375, "epoch": 1.2533589251439539, "grad_norm": 1.9503915309906006, "kl": 0.35324981808662415, "learning_rate": 3.464503390722497e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1306 }, { "completion_length": 154.5, "epoch": 1.2543186180422266, "grad_norm": 0.0165083147585392, "kl": 0.35558244585990906, "learning_rate": 3.4620102441861144e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1307 }, { "completion_length": 197.00001525878906, "epoch": 1.255278310940499, "grad_norm": 0.6689320206642151, "kl": 0.26497334241867065, "learning_rate": 3.459515974107667e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1308 }, { "completion_length": 155.7857208251953, "epoch": 1.2562380038387717, "grad_norm": 1.1553090810775757, "kl": 0.36137357354164124, "learning_rate": 3.4570205834002415e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1309 }, { "completion_length": 187.42857360839844, "epoch": 1.2571976967370442, "grad_norm": 1.3106788396835327, "kl": 0.6322866678237915, "learning_rate": 3.4545240749782314e-07, "loss": 0.0006, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1310 }, { "completion_length": 195.50001525878906, "epoch": 1.2581573896353166, "grad_norm": 1.6892032623291016, "kl": 0.28800565004348755, "learning_rate": 3.4520264517573335e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1311 }, { "completion_length": 192.57144165039062, "epoch": 1.2591170825335891, "grad_norm": 0.8792878985404968, "kl": 0.2572689950466156, "learning_rate": 3.44952771665455e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1312 }, { "completion_length": 202.6428680419922, "epoch": 1.2600767754318618, "grad_norm": 0.6303850412368774, "kl": 0.23879854381084442, "learning_rate": 3.447027872588182e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1313 }, { "completion_length": 149.71429443359375, "epoch": 1.2610364683301343, "grad_norm": 0.012096519581973553, "kl": 0.36897537112236023, "learning_rate": 3.444526922477822e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1314 }, { "completion_length": 193.7857208251953, "epoch": 1.261996161228407, "grad_norm": 0.9018742442131042, "kl": 0.3291376829147339, "learning_rate": 3.442024869244356e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1315 }, { "completion_length": 239.21429443359375, "epoch": 1.2629558541266794, "grad_norm": 0.4385557770729065, "kl": 0.2243610918521881, "learning_rate": 3.4395217158099604e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1316 }, { "completion_length": 175.2857208251953, "epoch": 1.263915547024952, "grad_norm": 1.0427578687667847, "kl": 0.27399942278862, "learning_rate": 3.437017465098095e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1317 }, { "completion_length": 148.35714721679688, "epoch": 1.2648752399232246, "grad_norm": 0.017879607155919075, "kl": 0.5060590505599976, "learning_rate": 3.4345121200335004e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1318 }, { "completion_length": 213.21429443359375, "epoch": 1.265834932821497, "grad_norm": 0.007910572923719883, "kl": 0.24306322634220123, "learning_rate": 3.4320056835421944e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1319 }, { "completion_length": 125.50000762939453, "epoch": 1.2667946257197698, "grad_norm": 0.8161956667900085, "kl": 0.34918805956840515, "learning_rate": 3.429498158551473e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1320 }, { "completion_length": 141.42857360839844, "epoch": 1.2677543186180422, "grad_norm": 0.014343502931296825, "kl": 0.3734358251094818, "learning_rate": 3.426989547989902e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1321 }, { "completion_length": 141.85714721679688, "epoch": 1.2687140115163147, "grad_norm": 1.2453197240829468, "kl": 0.40005409717559814, "learning_rate": 3.424479854787313e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1322 }, { "completion_length": 234.07144165039062, "epoch": 1.2696737044145874, "grad_norm": 1.1089692115783691, "kl": 0.22789666056632996, "learning_rate": 3.4219690818748035e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1323 }, { "completion_length": 153.07144165039062, "epoch": 1.2706333973128598, "grad_norm": 1.1891034841537476, "kl": 0.3251454830169678, "learning_rate": 3.419457232184733e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1324 }, { "completion_length": 140.71429443359375, "epoch": 1.2715930902111325, "grad_norm": 0.011180669069290161, "kl": 0.37172427773475647, "learning_rate": 3.416944308650717e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1325 }, { "completion_length": 144.21429443359375, "epoch": 1.272552783109405, "grad_norm": 0.054043691605329514, "kl": 0.5101447105407715, "learning_rate": 3.4144303142076267e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1326 }, { "completion_length": 158.5, "epoch": 1.2735124760076775, "grad_norm": 1.1885954141616821, "kl": 0.30886411666870117, "learning_rate": 3.411915251791582e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1327 }, { "completion_length": 189.2857208251953, "epoch": 1.2744721689059502, "grad_norm": 1.2696490287780762, "kl": 0.31244751811027527, "learning_rate": 3.409399124339951e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1328 }, { "completion_length": 181.42857360839844, "epoch": 1.2754318618042226, "grad_norm": 1.247694492340088, "kl": 0.28996720910072327, "learning_rate": 3.406881934791347e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1329 }, { "completion_length": 167.2857208251953, "epoch": 1.2763915547024953, "grad_norm": 1.060707926750183, "kl": 0.3404920697212219, "learning_rate": 3.4043636860856226e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1330 }, { "completion_length": 178.07144165039062, "epoch": 1.2773512476007678, "grad_norm": 1.1743206977844238, "kl": 0.3682054877281189, "learning_rate": 3.401844381163867e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1331 }, { "completion_length": 152.7857208251953, "epoch": 1.2783109404990403, "grad_norm": 2.350600481033325, "kl": 0.3560451567173004, "learning_rate": 3.399324022968403e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1332 }, { "completion_length": 166.2857208251953, "epoch": 1.2792706333973127, "grad_norm": 0.012278239242732525, "kl": 0.32276400923728943, "learning_rate": 3.396802614442784e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1333 }, { "completion_length": 171.71429443359375, "epoch": 1.2802303262955854, "grad_norm": 2.0189366340637207, "kl": 0.2884642779827118, "learning_rate": 3.3942801585317914e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1334 }, { "completion_length": 208.07144165039062, "epoch": 1.2811900191938579, "grad_norm": 0.027031395584344864, "kl": 0.34776777029037476, "learning_rate": 3.391756658181427e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1335 }, { "completion_length": 156.1428680419922, "epoch": 1.2821497120921306, "grad_norm": 2.1885764598846436, "kl": 0.4358733296394348, "learning_rate": 3.389232116338914e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1336 }, { "completion_length": 176.35714721679688, "epoch": 1.283109404990403, "grad_norm": 0.010456704534590244, "kl": 0.31485453248023987, "learning_rate": 3.3867065359526917e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1337 }, { "completion_length": 162.35714721679688, "epoch": 1.2840690978886755, "grad_norm": 1.924392819404602, "kl": 0.3287600874900818, "learning_rate": 3.3841799199724143e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1338 }, { "completion_length": 180.85714721679688, "epoch": 1.2850287907869482, "grad_norm": 0.706633985042572, "kl": 0.3439154326915741, "learning_rate": 3.3816522713489414e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1339 }, { "completion_length": 197.71429443359375, "epoch": 1.2859884836852207, "grad_norm": 1.6251552104949951, "kl": 0.35981473326683044, "learning_rate": 3.3791235930343417e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1340 }, { "completion_length": 146.35714721679688, "epoch": 1.2869481765834934, "grad_norm": 2.1002869606018066, "kl": 0.3344191908836365, "learning_rate": 3.376593887981886e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1341 }, { "completion_length": 187.42857360839844, "epoch": 1.2879078694817658, "grad_norm": 0.9757384657859802, "kl": 0.280601441860199, "learning_rate": 3.3740631591460434e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1342 }, { "completion_length": 153.57144165039062, "epoch": 1.2888675623800383, "grad_norm": 1.751451015472412, "kl": 0.3492797613143921, "learning_rate": 3.3715314094824796e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1343 }, { "completion_length": 202.07144165039062, "epoch": 1.289827255278311, "grad_norm": 0.7558333873748779, "kl": 0.3098606467247009, "learning_rate": 3.368998641948051e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1344 }, { "completion_length": 188.2857208251953, "epoch": 1.2907869481765835, "grad_norm": 1.3734210729599, "kl": 0.28724405169487, "learning_rate": 3.3664648595008064e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1345 }, { "completion_length": 171.42857360839844, "epoch": 1.2917466410748562, "grad_norm": 1.208512306213379, "kl": 0.27560991048812866, "learning_rate": 3.3639300650999754e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1346 }, { "completion_length": 204.7857208251953, "epoch": 1.2927063339731286, "grad_norm": 0.0071465675719082355, "kl": 0.23318925499916077, "learning_rate": 3.3613942617059723e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1347 }, { "completion_length": 141.21429443359375, "epoch": 1.293666026871401, "grad_norm": 0.8040897846221924, "kl": 0.4216707944869995, "learning_rate": 3.358857452280388e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1348 }, { "completion_length": 210.85714721679688, "epoch": 1.2946257197696738, "grad_norm": 0.041228197515010834, "kl": 0.3430657684803009, "learning_rate": 3.3563196397859905e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1349 }, { "completion_length": 158.5, "epoch": 1.2955854126679462, "grad_norm": 0.026563970372080803, "kl": 0.3923739790916443, "learning_rate": 3.3537808271867173e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1350 }, { "completion_length": 191.21429443359375, "epoch": 1.296545105566219, "grad_norm": 0.569332480430603, "kl": 0.3580017387866974, "learning_rate": 3.351241017447677e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1351 }, { "completion_length": 194.07144165039062, "epoch": 1.2975047984644914, "grad_norm": 0.008725427091121674, "kl": 0.33390170335769653, "learning_rate": 3.3487002135351376e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1352 }, { "completion_length": 137.85714721679688, "epoch": 1.2984644913627639, "grad_norm": 1.024111032485962, "kl": 0.3743915855884552, "learning_rate": 3.3461584184165324e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1353 }, { "completion_length": 227.57144165039062, "epoch": 1.2994241842610363, "grad_norm": 1.0325331687927246, "kl": 0.28935009241104126, "learning_rate": 3.343615635060452e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1354 }, { "completion_length": 193.85714721679688, "epoch": 1.300383877159309, "grad_norm": 0.009043104015290737, "kl": 0.26103246212005615, "learning_rate": 3.34107186643664e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1355 }, { "completion_length": 136.0, "epoch": 1.3013435700575815, "grad_norm": 1.1509108543395996, "kl": 0.4534020721912384, "learning_rate": 3.33852711551599e-07, "loss": 0.0005, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1356 }, { "completion_length": 183.50001525878906, "epoch": 1.3023032629558542, "grad_norm": 1.056512475013733, "kl": 0.375224769115448, "learning_rate": 3.3359813852705444e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1357 }, { "completion_length": 190.00001525878906, "epoch": 1.3032629558541267, "grad_norm": 1.0743775367736816, "kl": 0.2814732789993286, "learning_rate": 3.333434678673489e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1358 }, { "completion_length": 197.92857360839844, "epoch": 1.3042226487523991, "grad_norm": 0.012007053010165691, "kl": 0.2925198972225189, "learning_rate": 3.3308869986991487e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1359 }, { "completion_length": 190.1428680419922, "epoch": 1.3051823416506718, "grad_norm": 0.009325566701591015, "kl": 0.28776344656944275, "learning_rate": 3.3283383483229884e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1360 }, { "completion_length": 197.50001525878906, "epoch": 1.3061420345489443, "grad_norm": 0.03827258199453354, "kl": 0.29535719752311707, "learning_rate": 3.325788730521602e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1361 }, { "completion_length": 184.07144165039062, "epoch": 1.307101727447217, "grad_norm": 1.1059060096740723, "kl": 0.3376661539077759, "learning_rate": 3.323238148272717e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1362 }, { "completion_length": 179.1428680419922, "epoch": 1.3080614203454894, "grad_norm": 0.5828143954277039, "kl": 0.4105755388736725, "learning_rate": 3.320686604555184e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.4642857313156128, "rewards/check_similarity_func": 0.392857164144516, "step": 1363 }, { "completion_length": 149.92857360839844, "epoch": 1.309021113243762, "grad_norm": 0.06770168989896774, "kl": 0.48728662729263306, "learning_rate": 3.3181341023489795e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1364 }, { "completion_length": 155.21429443359375, "epoch": 1.3099808061420346, "grad_norm": 1.2204769849777222, "kl": 0.3674211800098419, "learning_rate": 3.315580644635199e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1365 }, { "completion_length": 179.07144165039062, "epoch": 1.310940499040307, "grad_norm": 1.8893071413040161, "kl": 0.3882984519004822, "learning_rate": 3.31302623439605e-07, "loss": 0.0004, "reward": 1.6785714626312256, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1366 }, { "completion_length": 205.07144165039062, "epoch": 1.3119001919385798, "grad_norm": 0.009958249516785145, "kl": 0.33016437292099, "learning_rate": 3.310470874614858e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1367 }, { "completion_length": 201.07144165039062, "epoch": 1.3128598848368522, "grad_norm": 0.8262827396392822, "kl": 0.3152879476547241, "learning_rate": 3.3079145682760556e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1368 }, { "completion_length": 168.0, "epoch": 1.3138195777351247, "grad_norm": 1.6229236125946045, "kl": 0.4203038513660431, "learning_rate": 3.305357318365179e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1369 }, { "completion_length": 160.6428680419922, "epoch": 1.3147792706333974, "grad_norm": 1.684450626373291, "kl": 0.47926896810531616, "learning_rate": 3.3027991278688674e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1370 }, { "completion_length": 195.1428680419922, "epoch": 1.3157389635316699, "grad_norm": 0.6934893131256104, "kl": 0.29341739416122437, "learning_rate": 3.3002399997748596e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1371 }, { "completion_length": 188.35714721679688, "epoch": 1.3166986564299425, "grad_norm": 0.011494173668324947, "kl": 0.3193625211715698, "learning_rate": 3.297679937071989e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1372 }, { "completion_length": 157.92857360839844, "epoch": 1.317658349328215, "grad_norm": 0.018887484446167946, "kl": 0.3616563379764557, "learning_rate": 3.29511894275018e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1373 }, { "completion_length": 169.85714721679688, "epoch": 1.3186180422264875, "grad_norm": 1.654610276222229, "kl": 0.37260839343070984, "learning_rate": 3.292557019800445e-07, "loss": 0.0004, "reward": 1.6071429252624512, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1374 }, { "completion_length": 161.71429443359375, "epoch": 1.31957773512476, "grad_norm": 1.7403349876403809, "kl": 0.3347037732601166, "learning_rate": 3.2899941712148813e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1375 }, { "completion_length": 207.85714721679688, "epoch": 1.3205374280230326, "grad_norm": 0.008668544702231884, "kl": 0.26617059111595154, "learning_rate": 3.287430399986667e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1376 }, { "completion_length": 160.2857208251953, "epoch": 1.3214971209213051, "grad_norm": 0.009160074405372143, "kl": 0.3463951051235199, "learning_rate": 3.284865709110059e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1377 }, { "completion_length": 142.2857208251953, "epoch": 1.3224568138195778, "grad_norm": 1.5084059238433838, "kl": 0.3918446898460388, "learning_rate": 3.2823001015803857e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1378 }, { "completion_length": 197.1428680419922, "epoch": 1.3234165067178503, "grad_norm": 0.009678017348051071, "kl": 0.27274802327156067, "learning_rate": 3.279733580394049e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1379 }, { "completion_length": 162.21429443359375, "epoch": 1.3243761996161227, "grad_norm": 0.9854947924613953, "kl": 0.27702006697654724, "learning_rate": 3.277166148548515e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1380 }, { "completion_length": 204.71429443359375, "epoch": 1.3253358925143954, "grad_norm": 1.2623273134231567, "kl": 0.26821982860565186, "learning_rate": 3.2745978090423164e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1381 }, { "completion_length": 197.07144165039062, "epoch": 1.326295585412668, "grad_norm": 1.655966877937317, "kl": 0.3054332435131073, "learning_rate": 3.2720285648750447e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1382 }, { "completion_length": 194.92857360839844, "epoch": 1.3272552783109406, "grad_norm": 1.387460708618164, "kl": 0.3019988536834717, "learning_rate": 3.269458419047345e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1383 }, { "completion_length": 112.64286041259766, "epoch": 1.328214971209213, "grad_norm": 0.01193670742213726, "kl": 0.45135775208473206, "learning_rate": 3.26688737456092e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1384 }, { "completion_length": 147.0, "epoch": 1.3291746641074855, "grad_norm": 0.009983493946492672, "kl": 0.3765932619571686, "learning_rate": 3.26431543441852e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1385 }, { "completion_length": 194.6428680419922, "epoch": 1.3301343570057582, "grad_norm": 0.08309076726436615, "kl": 0.35816147923469543, "learning_rate": 3.261742601623942e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1386 }, { "completion_length": 152.5, "epoch": 1.3310940499040307, "grad_norm": 2.337327241897583, "kl": 2.5746288299560547, "learning_rate": 3.259168879182024e-07, "loss": 0.0026, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1387 }, { "completion_length": 165.2857208251953, "epoch": 1.3320537428023034, "grad_norm": 1.0494537353515625, "kl": 0.44858914613723755, "learning_rate": 3.256594270098644e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1388 }, { "completion_length": 140.92857360839844, "epoch": 1.3330134357005758, "grad_norm": 1.9292932748794556, "kl": 0.3786827027797699, "learning_rate": 3.254018777380716e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1389 }, { "completion_length": 213.50001525878906, "epoch": 1.3339731285988483, "grad_norm": 0.01146668940782547, "kl": 0.2873183786869049, "learning_rate": 3.2514424040361854e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1390 }, { "completion_length": 214.7857208251953, "epoch": 1.334932821497121, "grad_norm": 0.9035501480102539, "kl": 0.25699007511138916, "learning_rate": 3.248865153074025e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1391 }, { "completion_length": 187.7857208251953, "epoch": 1.3358925143953935, "grad_norm": 1.1792254447937012, "kl": 0.294695109128952, "learning_rate": 3.2462870275042367e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1392 }, { "completion_length": 155.5, "epoch": 1.3368522072936662, "grad_norm": 1.6218328475952148, "kl": 0.3899102509021759, "learning_rate": 3.243708030337838e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1393 }, { "completion_length": 191.2857208251953, "epoch": 1.3378119001919386, "grad_norm": 1.1779870986938477, "kl": 0.2908651530742645, "learning_rate": 3.241128164586869e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1394 }, { "completion_length": 195.6428680419922, "epoch": 1.338771593090211, "grad_norm": 0.8815024495124817, "kl": 0.3095019459724426, "learning_rate": 3.2385474332643816e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1395 }, { "completion_length": 225.7857208251953, "epoch": 1.3397312859884836, "grad_norm": 0.638983964920044, "kl": 0.22616618871688843, "learning_rate": 3.23596583938444e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1396 }, { "completion_length": 158.57144165039062, "epoch": 1.3406909788867563, "grad_norm": 1.2746766805648804, "kl": 0.3169882297515869, "learning_rate": 3.233383385962115e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1397 }, { "completion_length": 152.57144165039062, "epoch": 1.3416506717850287, "grad_norm": 2.7031192779541016, "kl": 0.4248184561729431, "learning_rate": 3.230800076013482e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1398 }, { "completion_length": 215.85714721679688, "epoch": 1.3426103646833014, "grad_norm": 0.651456892490387, "kl": 0.24747635424137115, "learning_rate": 3.228215912555617e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1399 }, { "completion_length": 216.85714721679688, "epoch": 1.3435700575815739, "grad_norm": 0.009778940118849277, "kl": 0.25724536180496216, "learning_rate": 3.2256308986065904e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1400 }, { "completion_length": 169.21429443359375, "epoch": 1.3445297504798464, "grad_norm": 0.008826049976050854, "kl": 0.30253300070762634, "learning_rate": 3.223045037185469e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1401 }, { "completion_length": 174.07144165039062, "epoch": 1.345489443378119, "grad_norm": 2.6801650524139404, "kl": 0.3153795003890991, "learning_rate": 3.220458331312308e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1402 }, { "completion_length": 173.2857208251953, "epoch": 1.3464491362763915, "grad_norm": 1.7219350337982178, "kl": 0.3308963477611542, "learning_rate": 3.217870784008149e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1403 }, { "completion_length": 184.07144165039062, "epoch": 1.3474088291746642, "grad_norm": 1.7192094326019287, "kl": 0.27958473563194275, "learning_rate": 3.215282398295014e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1404 }, { "completion_length": 158.07144165039062, "epoch": 1.3483685220729367, "grad_norm": 2.1195578575134277, "kl": 0.3459629416465759, "learning_rate": 3.2126931771959094e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1405 }, { "completion_length": 198.57144165039062, "epoch": 1.3493282149712091, "grad_norm": 0.7420687675476074, "kl": 0.31385910511016846, "learning_rate": 3.210103123734813e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1406 }, { "completion_length": 162.1428680419922, "epoch": 1.3502879078694818, "grad_norm": 2.2419638633728027, "kl": 0.3806152045726776, "learning_rate": 3.2075122409366755e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1407 }, { "completion_length": 155.2857208251953, "epoch": 1.3512476007677543, "grad_norm": 0.009812967851758003, "kl": 0.338000625371933, "learning_rate": 3.2049205318274176e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1408 }, { "completion_length": 182.92857360839844, "epoch": 1.352207293666027, "grad_norm": 0.00910151656717062, "kl": 0.3065442144870758, "learning_rate": 3.2023279994339236e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1409 }, { "completion_length": 197.71429443359375, "epoch": 1.3531669865642995, "grad_norm": 0.04169289022684097, "kl": 0.3819942772388458, "learning_rate": 3.199734646784039e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1410 }, { "completion_length": 160.42857360839844, "epoch": 1.354126679462572, "grad_norm": 0.9294042587280273, "kl": 0.36895453929901123, "learning_rate": 3.19714047690657e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1411 }, { "completion_length": 182.2857208251953, "epoch": 1.3550863723608444, "grad_norm": 1.0387628078460693, "kl": 0.4158984124660492, "learning_rate": 3.1945454928312744e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1412 }, { "completion_length": 161.71429443359375, "epoch": 1.356046065259117, "grad_norm": 1.4398424625396729, "kl": 0.358658105134964, "learning_rate": 3.1919496975888616e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1413 }, { "completion_length": 225.35714721679688, "epoch": 1.3570057581573896, "grad_norm": 1.3806003332138062, "kl": 0.26682865619659424, "learning_rate": 3.1893530942109883e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1414 }, { "completion_length": 172.42857360839844, "epoch": 1.3579654510556622, "grad_norm": 1.27720046043396, "kl": 0.3600917160511017, "learning_rate": 3.186755685730257e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1415 }, { "completion_length": 126.92857360839844, "epoch": 1.3589251439539347, "grad_norm": 2.2104151248931885, "kl": 0.4541868567466736, "learning_rate": 3.184157475180207e-07, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1416 }, { "completion_length": 157.6428680419922, "epoch": 1.3598848368522072, "grad_norm": 0.018560893833637238, "kl": 0.37677201628685, "learning_rate": 3.1815584655953176e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1417 }, { "completion_length": 180.6428680419922, "epoch": 1.3608445297504799, "grad_norm": 1.461193323135376, "kl": 0.3339272439479828, "learning_rate": 3.178958660010999e-07, "loss": 0.0003, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1418 }, { "completion_length": 170.6428680419922, "epoch": 1.3618042226487523, "grad_norm": 0.014303894713521004, "kl": 0.36958783864974976, "learning_rate": 3.176358061463593e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1419 }, { "completion_length": 180.7857208251953, "epoch": 1.362763915547025, "grad_norm": 0.01134550478309393, "kl": 0.29360970854759216, "learning_rate": 3.173756672990365e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1420 }, { "completion_length": 167.07144165039062, "epoch": 1.3637236084452975, "grad_norm": 0.00879689585417509, "kl": 0.31877440214157104, "learning_rate": 3.171154497629506e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1421 }, { "completion_length": 161.21429443359375, "epoch": 1.36468330134357, "grad_norm": 0.866333544254303, "kl": 0.3831079602241516, "learning_rate": 3.1685515384201236e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1422 }, { "completion_length": 176.7857208251953, "epoch": 1.3656429942418427, "grad_norm": 1.7243690490722656, "kl": 0.40315401554107666, "learning_rate": 3.165947798402242e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1423 }, { "completion_length": 223.00001525878906, "epoch": 1.3666026871401151, "grad_norm": 2.0472264289855957, "kl": 0.324944406747818, "learning_rate": 3.163343280616797e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1424 }, { "completion_length": 129.6428680419922, "epoch": 1.3675623800383878, "grad_norm": 2.0750367641448975, "kl": 0.45777854323387146, "learning_rate": 3.1607379881056327e-07, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1425 }, { "completion_length": 186.07144165039062, "epoch": 1.3685220729366603, "grad_norm": 0.842551589012146, "kl": 0.2713443636894226, "learning_rate": 3.1581319239114976e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1426 }, { "completion_length": 161.7857208251953, "epoch": 1.3694817658349328, "grad_norm": 0.8801618218421936, "kl": 0.3074701428413391, "learning_rate": 3.1555250910780436e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1427 }, { "completion_length": 178.2857208251953, "epoch": 1.3704414587332054, "grad_norm": 0.020503729581832886, "kl": 0.34161895513534546, "learning_rate": 3.152917492649817e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1428 }, { "completion_length": 140.6428680419922, "epoch": 1.371401151631478, "grad_norm": 1.4334783554077148, "kl": 0.5143711566925049, "learning_rate": 3.15030913167226e-07, "loss": 0.0005, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1429 }, { "completion_length": 113.78572082519531, "epoch": 1.3723608445297506, "grad_norm": 2.2057199478149414, "kl": 0.40671423077583313, "learning_rate": 3.1477000111917066e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1430 }, { "completion_length": 163.21429443359375, "epoch": 1.373320537428023, "grad_norm": 0.01265462301671505, "kl": 0.3309328556060791, "learning_rate": 3.145090134255376e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1431 }, { "completion_length": 159.0, "epoch": 1.3742802303262955, "grad_norm": 1.499495267868042, "kl": 1.9047292470932007, "learning_rate": 3.142479503911371e-07, "loss": 0.0019, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1432 }, { "completion_length": 201.1428680419922, "epoch": 1.375239923224568, "grad_norm": 0.9854056239128113, "kl": 0.26626524329185486, "learning_rate": 3.139868123208675e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1433 }, { "completion_length": 193.92857360839844, "epoch": 1.3761996161228407, "grad_norm": 2.503533124923706, "kl": 0.3297451436519623, "learning_rate": 3.1372559951971465e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1434 }, { "completion_length": 166.6428680419922, "epoch": 1.3771593090211132, "grad_norm": 1.7516162395477295, "kl": 0.3675219416618347, "learning_rate": 3.134643122927519e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1435 }, { "completion_length": 185.50001525878906, "epoch": 1.3781190019193859, "grad_norm": 0.8746750950813293, "kl": 0.4303252696990967, "learning_rate": 3.132029509451395e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1436 }, { "completion_length": 262.8571472167969, "epoch": 1.3790786948176583, "grad_norm": 0.6960357427597046, "kl": 0.2370123267173767, "learning_rate": 3.129415157821239e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1437 }, { "completion_length": 154.2857208251953, "epoch": 1.3800383877159308, "grad_norm": 1.7862883806228638, "kl": 0.3745957612991333, "learning_rate": 3.1268000710903817e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1438 }, { "completion_length": 185.71429443359375, "epoch": 1.3809980806142035, "grad_norm": 0.609161376953125, "kl": 0.2884595990180969, "learning_rate": 3.1241842523130113e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1439 }, { "completion_length": 165.07144165039062, "epoch": 1.381957773512476, "grad_norm": 1.397654414176941, "kl": 0.29246068000793457, "learning_rate": 3.12156770454417e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1440 }, { "completion_length": 187.92857360839844, "epoch": 1.3829174664107486, "grad_norm": 0.014372305944561958, "kl": 0.28157421946525574, "learning_rate": 3.1189504308397516e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1441 }, { "completion_length": 147.57144165039062, "epoch": 1.383877159309021, "grad_norm": 1.982388973236084, "kl": 0.40739884972572327, "learning_rate": 3.116332434256499e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1442 }, { "completion_length": 183.85714721679688, "epoch": 1.3848368522072936, "grad_norm": 0.8335322141647339, "kl": 0.31182485818862915, "learning_rate": 3.1137137178519977e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1443 }, { "completion_length": 168.92857360839844, "epoch": 1.3857965451055663, "grad_norm": 0.8928152322769165, "kl": 0.3570810854434967, "learning_rate": 3.111094284684675e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1444 }, { "completion_length": 146.92857360839844, "epoch": 1.3867562380038387, "grad_norm": 2.5018420219421387, "kl": 0.4252552092075348, "learning_rate": 3.1084741378137947e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1445 }, { "completion_length": 210.42857360839844, "epoch": 1.3877159309021114, "grad_norm": 0.009670042432844639, "kl": 0.25716105103492737, "learning_rate": 3.105853280299454e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1446 }, { "completion_length": 211.7857208251953, "epoch": 1.388675623800384, "grad_norm": 0.011147195473313332, "kl": 0.28287404775619507, "learning_rate": 3.103231715202582e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1447 }, { "completion_length": 166.5, "epoch": 1.3896353166986564, "grad_norm": 1.521304726600647, "kl": 0.3620108366012573, "learning_rate": 3.10060944558493e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1448 }, { "completion_length": 197.50001525878906, "epoch": 1.390595009596929, "grad_norm": 1.07317054271698, "kl": 0.2716091275215149, "learning_rate": 3.0979864745090777e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1449 }, { "completion_length": 177.92857360839844, "epoch": 1.3915547024952015, "grad_norm": 0.01450048666447401, "kl": 0.3263911306858063, "learning_rate": 3.0953628050384194e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1450 }, { "completion_length": 142.42857360839844, "epoch": 1.3925143953934742, "grad_norm": 0.01471895445138216, "kl": 0.3740783929824829, "learning_rate": 3.0927384402371664e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1451 }, { "completion_length": 145.21429443359375, "epoch": 1.3934740882917467, "grad_norm": 0.8893683552742004, "kl": 0.436830997467041, "learning_rate": 3.0901133831703434e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1452 }, { "completion_length": 152.85714721679688, "epoch": 1.3944337811900192, "grad_norm": 1.5237174034118652, "kl": 0.41211849451065063, "learning_rate": 3.0874876369037835e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1453 }, { "completion_length": 162.71429443359375, "epoch": 1.3953934740882916, "grad_norm": 1.353291392326355, "kl": 0.3225920796394348, "learning_rate": 3.084861204504122e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1454 }, { "completion_length": 190.1428680419922, "epoch": 1.3963531669865643, "grad_norm": 1.076984167098999, "kl": 0.40525752305984497, "learning_rate": 3.0822340890387973e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1455 }, { "completion_length": 183.57144165039062, "epoch": 1.3973128598848368, "grad_norm": 0.01845143362879753, "kl": 0.32650095224380493, "learning_rate": 3.079606293576047e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1456 }, { "completion_length": 202.6428680419922, "epoch": 1.3982725527831095, "grad_norm": 0.00944567285478115, "kl": 0.26404082775115967, "learning_rate": 3.0769778211849026e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1457 }, { "completion_length": 213.00001525878906, "epoch": 1.399232245681382, "grad_norm": 1.4942842721939087, "kl": 0.30972209572792053, "learning_rate": 3.0743486749351816e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1458 }, { "completion_length": 165.07144165039062, "epoch": 1.4001919385796544, "grad_norm": 1.3293251991271973, "kl": 0.38960975408554077, "learning_rate": 3.071718857897495e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1459 }, { "completion_length": 178.00001525878906, "epoch": 1.401151631477927, "grad_norm": 1.3407949209213257, "kl": 0.33822789788246155, "learning_rate": 3.069088373143234e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1460 }, { "completion_length": 167.6428680419922, "epoch": 1.4021113243761996, "grad_norm": 0.7155161499977112, "kl": 0.381339967250824, "learning_rate": 3.066457223744568e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1461 }, { "completion_length": 170.5, "epoch": 1.4030710172744723, "grad_norm": 2.073965311050415, "kl": 0.374606728553772, "learning_rate": 3.0638254127744466e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1462 }, { "completion_length": 122.21429443359375, "epoch": 1.4040307101727447, "grad_norm": 0.022044943645596504, "kl": 0.4698442816734314, "learning_rate": 3.061192943306589e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1463 }, { "completion_length": 136.21429443359375, "epoch": 1.4049904030710172, "grad_norm": 0.15681734681129456, "kl": 0.6496915817260742, "learning_rate": 3.058559818415485e-07, "loss": 0.0006, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1464 }, { "completion_length": 189.57144165039062, "epoch": 1.4059500959692899, "grad_norm": 2.0541367530822754, "kl": 0.34640946984291077, "learning_rate": 3.055926041176389e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1465 }, { "completion_length": 191.85714721679688, "epoch": 1.4069097888675623, "grad_norm": 1.0539017915725708, "kl": 0.28965967893600464, "learning_rate": 3.0532916146653174e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1466 }, { "completion_length": 137.07144165039062, "epoch": 1.407869481765835, "grad_norm": 0.012752341106534004, "kl": 0.4512069523334503, "learning_rate": 3.050656541959046e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1467 }, { "completion_length": 190.85714721679688, "epoch": 1.4088291746641075, "grad_norm": 0.010148433037102222, "kl": 0.30336982011795044, "learning_rate": 3.0480208261351037e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1468 }, { "completion_length": 169.2857208251953, "epoch": 1.40978886756238, "grad_norm": 0.014157217927277088, "kl": 0.3693729043006897, "learning_rate": 3.0453844702717714e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1469 }, { "completion_length": 179.2857208251953, "epoch": 1.4107485604606527, "grad_norm": 0.012884476222097874, "kl": 0.3525921702384949, "learning_rate": 3.042747477448078e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1470 }, { "completion_length": 204.71429443359375, "epoch": 1.4117082533589251, "grad_norm": 0.012833567336201668, "kl": 0.24605156481266022, "learning_rate": 3.0401098507437935e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1471 }, { "completion_length": 168.6428680419922, "epoch": 1.4126679462571978, "grad_norm": 0.007733297534286976, "kl": 0.31698665022850037, "learning_rate": 3.037471593239432e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1472 }, { "completion_length": 157.21429443359375, "epoch": 1.4136276391554703, "grad_norm": 1.0292186737060547, "kl": 0.34739306569099426, "learning_rate": 3.034832708016243e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1473 }, { "completion_length": 138.1428680419922, "epoch": 1.4145873320537428, "grad_norm": 2.4825258255004883, "kl": 0.37489214539527893, "learning_rate": 3.0321931981562094e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1474 }, { "completion_length": 126.92857360839844, "epoch": 1.4155470249520152, "grad_norm": 1.3556445837020874, "kl": 0.38441869616508484, "learning_rate": 3.029553066742041e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1475 }, { "completion_length": 177.00001525878906, "epoch": 1.416506717850288, "grad_norm": 1.6964733600616455, "kl": 0.3079621195793152, "learning_rate": 3.0269123168571757e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.3535534143447876, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1476 }, { "completion_length": 172.6428680419922, "epoch": 1.4174664107485604, "grad_norm": 0.04542498290538788, "kl": 0.3604735732078552, "learning_rate": 3.024270951585776e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1477 }, { "completion_length": 177.00001525878906, "epoch": 1.418426103646833, "grad_norm": 0.009216926991939545, "kl": 0.310576856136322, "learning_rate": 3.0216289740127183e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1478 }, { "completion_length": 190.35714721679688, "epoch": 1.4193857965451055, "grad_norm": 0.008234653621912003, "kl": 0.25262296199798584, "learning_rate": 3.0189863872235966e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1479 }, { "completion_length": 173.21429443359375, "epoch": 1.420345489443378, "grad_norm": 0.015491222031414509, "kl": 0.35027411580085754, "learning_rate": 3.0163431943047176e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1480 }, { "completion_length": 175.1428680419922, "epoch": 1.4213051823416507, "grad_norm": 1.2134352922439575, "kl": 0.3369816243648529, "learning_rate": 3.013699398343094e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1481 }, { "completion_length": 155.35714721679688, "epoch": 1.4222648752399232, "grad_norm": 1.5574076175689697, "kl": 0.34916070103645325, "learning_rate": 3.011055002426443e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1482 }, { "completion_length": 168.6428680419922, "epoch": 1.4232245681381959, "grad_norm": 1.0366215705871582, "kl": 0.42756831645965576, "learning_rate": 3.008410009643184e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1483 }, { "completion_length": 159.85714721679688, "epoch": 1.4241842610364683, "grad_norm": 1.8822400569915771, "kl": 0.3550178110599518, "learning_rate": 3.0057644230824314e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1484 }, { "completion_length": 185.35714721679688, "epoch": 1.4251439539347408, "grad_norm": 0.6608047485351562, "kl": 0.27016162872314453, "learning_rate": 3.0031182458339936e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1485 }, { "completion_length": 137.71429443359375, "epoch": 1.4261036468330135, "grad_norm": 2.127793312072754, "kl": 0.39609071612358093, "learning_rate": 3.00047148098837e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1486 }, { "completion_length": 169.7857208251953, "epoch": 1.427063339731286, "grad_norm": 1.28236722946167, "kl": 0.28900471329689026, "learning_rate": 2.997824131636747e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1487 }, { "completion_length": 191.21429443359375, "epoch": 1.4280230326295587, "grad_norm": 0.6095403432846069, "kl": 0.27669215202331543, "learning_rate": 2.9951762008709904e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1488 }, { "completion_length": 119.0714340209961, "epoch": 1.4289827255278311, "grad_norm": 0.01477894838899374, "kl": 0.416464239358902, "learning_rate": 2.9925276917836467e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1489 }, { "completion_length": 173.6428680419922, "epoch": 1.4299424184261036, "grad_norm": 0.8682979941368103, "kl": 0.37530454993247986, "learning_rate": 2.989878607467939e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1490 }, { "completion_length": 176.35714721679688, "epoch": 1.4309021113243763, "grad_norm": 0.15022769570350647, "kl": 0.4476901888847351, "learning_rate": 2.987228951017762e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1491 }, { "completion_length": 179.21429443359375, "epoch": 1.4318618042226487, "grad_norm": 0.008990493603050709, "kl": 0.2907656729221344, "learning_rate": 2.984578725527675e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1492 }, { "completion_length": 162.0, "epoch": 1.4328214971209214, "grad_norm": 2.3251423835754395, "kl": 0.394286572933197, "learning_rate": 2.9819279340929073e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1493 }, { "completion_length": 188.92857360839844, "epoch": 1.433781190019194, "grad_norm": 0.6390753388404846, "kl": 0.32785457372665405, "learning_rate": 2.979276579809346e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1494 }, { "completion_length": 140.71429443359375, "epoch": 1.4347408829174664, "grad_norm": 0.012667509727180004, "kl": 0.4162547290325165, "learning_rate": 2.976624665773536e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1495 }, { "completion_length": 130.1428680419922, "epoch": 1.4357005758157388, "grad_norm": 2.2107574939727783, "kl": 0.4870419502258301, "learning_rate": 2.9739721950826755e-07, "loss": 0.0005, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2142857313156128, "step": 1496 }, { "completion_length": 144.21429443359375, "epoch": 1.4366602687140115, "grad_norm": 1.2217135429382324, "kl": 0.429778516292572, "learning_rate": 2.9713191708346147e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1497 }, { "completion_length": 127.92857360839844, "epoch": 1.437619961612284, "grad_norm": 1.2661714553833008, "kl": 0.4481840431690216, "learning_rate": 2.9686655961278476e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1498 }, { "completion_length": 187.85714721679688, "epoch": 1.4385796545105567, "grad_norm": 0.007788974326103926, "kl": 0.25776246190071106, "learning_rate": 2.966011474061514e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1499 }, { "completion_length": 166.0, "epoch": 1.4395393474088292, "grad_norm": 1.0619094371795654, "kl": 0.3664996325969696, "learning_rate": 2.9633568077353904e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1500 }, { "completion_length": 137.6428680419922, "epoch": 1.4404990403071016, "grad_norm": 2.474949359893799, "kl": 0.37904414534568787, "learning_rate": 2.9607016002498904e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1501 }, { "completion_length": 177.21429443359375, "epoch": 1.4414587332053743, "grad_norm": 1.3091986179351807, "kl": 0.32470470666885376, "learning_rate": 2.9580458547060584e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1502 }, { "completion_length": 178.00001525878906, "epoch": 1.4424184261036468, "grad_norm": 1.8557960987091064, "kl": 0.3361155688762665, "learning_rate": 2.955389574205569e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1503 }, { "completion_length": 189.21429443359375, "epoch": 1.4433781190019195, "grad_norm": 0.49237409234046936, "kl": 0.41861310601234436, "learning_rate": 2.9527327618507214e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1504 }, { "completion_length": 164.6428680419922, "epoch": 1.444337811900192, "grad_norm": 1.3688380718231201, "kl": 0.32387590408325195, "learning_rate": 2.950075420744434e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1505 }, { "completion_length": 175.42857360839844, "epoch": 1.4452975047984644, "grad_norm": 0.7647241950035095, "kl": 0.28737226128578186, "learning_rate": 2.947417553990244e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1506 }, { "completion_length": 181.7857208251953, "epoch": 1.446257197696737, "grad_norm": 0.7684998512268066, "kl": 0.3356458246707916, "learning_rate": 2.9447591646923014e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1507 }, { "completion_length": 202.92857360839844, "epoch": 1.4472168905950096, "grad_norm": 1.573401927947998, "kl": 0.27025315165519714, "learning_rate": 2.9421002559553703e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1508 }, { "completion_length": 168.71429443359375, "epoch": 1.4481765834932823, "grad_norm": 1.7106777429580688, "kl": 0.32655370235443115, "learning_rate": 2.9394408308848163e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1509 }, { "completion_length": 186.71429443359375, "epoch": 1.4491362763915547, "grad_norm": 0.026483934372663498, "kl": 0.293690025806427, "learning_rate": 2.936780892586611e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1510 }, { "completion_length": 157.21429443359375, "epoch": 1.4500959692898272, "grad_norm": 1.5719672441482544, "kl": 0.3704513907432556, "learning_rate": 2.934120444167326e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1511 }, { "completion_length": 170.0, "epoch": 1.4510556621880997, "grad_norm": 0.00955125316977501, "kl": 0.289490282535553, "learning_rate": 2.931459488734126e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1512 }, { "completion_length": 192.00001525878906, "epoch": 1.4520153550863724, "grad_norm": 0.02028525248169899, "kl": 0.38387101888656616, "learning_rate": 2.92879802939477e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1513 }, { "completion_length": 132.71429443359375, "epoch": 1.452975047984645, "grad_norm": 2.3485560417175293, "kl": 0.5183812379837036, "learning_rate": 2.926136069257604e-07, "loss": 0.0005, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1514 }, { "completion_length": 132.6428680419922, "epoch": 1.4539347408829175, "grad_norm": 0.012627658434212208, "kl": 0.4118000566959381, "learning_rate": 2.923473611431561e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1515 }, { "completion_length": 170.21429443359375, "epoch": 1.45489443378119, "grad_norm": 0.01037768367677927, "kl": 0.34825000166893005, "learning_rate": 2.920810659026154e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1516 }, { "completion_length": 163.92857360839844, "epoch": 1.4558541266794625, "grad_norm": 1.4492896795272827, "kl": 0.4117576777935028, "learning_rate": 2.9181472151514727e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1517 }, { "completion_length": 175.92857360839844, "epoch": 1.4568138195777351, "grad_norm": 0.9362148642539978, "kl": 0.33571866154670715, "learning_rate": 2.915483282918182e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1518 }, { "completion_length": 156.07144165039062, "epoch": 1.4577735124760076, "grad_norm": 1.3840886354446411, "kl": 0.3973105549812317, "learning_rate": 2.9128188654375165e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1519 }, { "completion_length": 132.2857208251953, "epoch": 1.4587332053742803, "grad_norm": 2.3566370010375977, "kl": 0.480069100856781, "learning_rate": 2.9101539658212794e-07, "loss": 0.0005, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1520 }, { "completion_length": 131.42857360839844, "epoch": 1.4596928982725528, "grad_norm": 1.0562106370925903, "kl": 0.36467811465263367, "learning_rate": 2.907488587181833e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1521 }, { "completion_length": 193.42857360839844, "epoch": 1.4606525911708252, "grad_norm": 0.8108013272285461, "kl": 0.34430983662605286, "learning_rate": 2.9048227326321044e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1522 }, { "completion_length": 212.21429443359375, "epoch": 1.461612284069098, "grad_norm": 0.010624436661601067, "kl": 0.2469228059053421, "learning_rate": 2.9021564052855716e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1523 }, { "completion_length": 201.00001525878906, "epoch": 1.4625719769673704, "grad_norm": 1.2698132991790771, "kl": 0.3400651812553406, "learning_rate": 2.8994896082562674e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1524 }, { "completion_length": 143.71429443359375, "epoch": 1.463531669865643, "grad_norm": 0.10093080997467041, "kl": 0.6346778869628906, "learning_rate": 2.896822344658774e-07, "loss": 0.0006, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1525 }, { "completion_length": 191.07144165039062, "epoch": 1.4644913627639156, "grad_norm": 0.8934696316719055, "kl": 0.3389838933944702, "learning_rate": 2.8941546176082146e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1526 }, { "completion_length": 113.14286041259766, "epoch": 1.465451055662188, "grad_norm": 0.018654901534318924, "kl": 0.5785670280456543, "learning_rate": 2.891486430220258e-07, "loss": 0.0006, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1527 }, { "completion_length": 131.0, "epoch": 1.4664107485604607, "grad_norm": 1.1802031993865967, "kl": 0.4878931939601898, "learning_rate": 2.888817785611108e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1528 }, { "completion_length": 211.2857208251953, "epoch": 1.4673704414587332, "grad_norm": 1.943509817123413, "kl": 0.3286995589733124, "learning_rate": 2.886148686897504e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1529 }, { "completion_length": 161.2857208251953, "epoch": 1.4683301343570059, "grad_norm": 1.51057767868042, "kl": 0.37780922651290894, "learning_rate": 2.883479137196714e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1530 }, { "completion_length": 170.85714721679688, "epoch": 1.4692898272552783, "grad_norm": 1.593044400215149, "kl": 0.3303181529045105, "learning_rate": 2.880809139626533e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1531 }, { "completion_length": 163.6428680419922, "epoch": 1.4702495201535508, "grad_norm": 0.7986063361167908, "kl": 0.43691766262054443, "learning_rate": 2.878138697305282e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1532 }, { "completion_length": 133.85714721679688, "epoch": 1.4712092130518233, "grad_norm": 1.2019466161727905, "kl": 0.4620383381843567, "learning_rate": 2.8754678133517986e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1533 }, { "completion_length": 140.0, "epoch": 1.472168905950096, "grad_norm": 1.7608203887939453, "kl": 0.4380822479724884, "learning_rate": 2.8727964908854354e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1534 }, { "completion_length": 183.1428680419922, "epoch": 1.4731285988483684, "grad_norm": 0.9068869352340698, "kl": 0.5348504781723022, "learning_rate": 2.8701247330260596e-07, "loss": 0.0005, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1535 }, { "completion_length": 180.7857208251953, "epoch": 1.4740882917466411, "grad_norm": 1.8888686895370483, "kl": 0.39988020062446594, "learning_rate": 2.867452542894045e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1536 }, { "completion_length": 133.71429443359375, "epoch": 1.4750479846449136, "grad_norm": 1.513077735900879, "kl": 0.43464362621307373, "learning_rate": 2.8647799236102723e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1537 }, { "completion_length": 182.50001525878906, "epoch": 1.476007677543186, "grad_norm": 1.1531778573989868, "kl": 0.3502123951911926, "learning_rate": 2.8621068782961204e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1538 }, { "completion_length": 193.42857360839844, "epoch": 1.4769673704414588, "grad_norm": 1.0449150800704956, "kl": 0.2859962582588196, "learning_rate": 2.85943341007347e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1539 }, { "completion_length": 158.57144165039062, "epoch": 1.4779270633397312, "grad_norm": 1.4487708806991577, "kl": 0.37207168340682983, "learning_rate": 2.856759522064691e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1540 }, { "completion_length": 132.5, "epoch": 1.478886756238004, "grad_norm": 0.01666552759706974, "kl": 0.4739375114440918, "learning_rate": 2.854085217392647e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1541 }, { "completion_length": 169.85714721679688, "epoch": 1.4798464491362764, "grad_norm": 0.6113253235816956, "kl": 0.4515319764614105, "learning_rate": 2.8514104991806864e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1542 }, { "completion_length": 185.85714721679688, "epoch": 1.4808061420345489, "grad_norm": 1.8037513494491577, "kl": 0.33501580357551575, "learning_rate": 2.8487353705526414e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1543 }, { "completion_length": 202.00001525878906, "epoch": 1.4817658349328215, "grad_norm": 0.00981786847114563, "kl": 0.3065761923789978, "learning_rate": 2.8460598346328233e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1544 }, { "completion_length": 187.07144165039062, "epoch": 1.482725527831094, "grad_norm": 0.011768471449613571, "kl": 0.3311178386211395, "learning_rate": 2.8433838945460205e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1545 }, { "completion_length": 222.07144165039062, "epoch": 1.4836852207293667, "grad_norm": 1.2723348140716553, "kl": 0.25671133399009705, "learning_rate": 2.8407075534174906e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1546 }, { "completion_length": 184.35714721679688, "epoch": 1.4846449136276392, "grad_norm": 0.01246793381869793, "kl": 0.3216184377670288, "learning_rate": 2.838030814372961e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1547 }, { "completion_length": 219.50001525878906, "epoch": 1.4856046065259116, "grad_norm": 1.1666831970214844, "kl": 0.2766213119029999, "learning_rate": 2.835353680538624e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1548 }, { "completion_length": 150.7857208251953, "epoch": 1.4865642994241843, "grad_norm": 1.5302189588546753, "kl": 0.4213651716709137, "learning_rate": 2.8326761550411346e-07, "loss": 0.0004, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1549 }, { "completion_length": 159.7857208251953, "epoch": 1.4875239923224568, "grad_norm": 0.02019377052783966, "kl": 0.4517909586429596, "learning_rate": 2.8299982410076005e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1550 }, { "completion_length": 184.57144165039062, "epoch": 1.4884836852207295, "grad_norm": 0.009138748049736023, "kl": 0.32618361711502075, "learning_rate": 2.8273199415655887e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1551 }, { "completion_length": 192.50001525878906, "epoch": 1.489443378119002, "grad_norm": 1.699334740638733, "kl": 0.3560790419578552, "learning_rate": 2.8246412598431124e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1552 }, { "completion_length": 174.07144165039062, "epoch": 1.4904030710172744, "grad_norm": 1.3614369630813599, "kl": 0.3378537893295288, "learning_rate": 2.8219621989686333e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1553 }, { "completion_length": 180.2857208251953, "epoch": 1.491362763915547, "grad_norm": 1.2655084133148193, "kl": 0.31722700595855713, "learning_rate": 2.819282762071055e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1554 }, { "completion_length": 169.5, "epoch": 1.4923224568138196, "grad_norm": 1.780190348625183, "kl": 0.4402361214160919, "learning_rate": 2.8166029522797204e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1555 }, { "completion_length": 150.21429443359375, "epoch": 1.493282149712092, "grad_norm": 0.013280869461596012, "kl": 0.3995670676231384, "learning_rate": 2.8139227727244085e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1556 }, { "completion_length": 146.5, "epoch": 1.4942418426103647, "grad_norm": 0.9046369791030884, "kl": 0.43829473853111267, "learning_rate": 2.811242226535329e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1557 }, { "completion_length": 176.35714721679688, "epoch": 1.4952015355086372, "grad_norm": 1.424525260925293, "kl": 0.38319385051727295, "learning_rate": 2.808561316843122e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1558 }, { "completion_length": 149.57144165039062, "epoch": 1.4961612284069097, "grad_norm": 0.0800769031047821, "kl": 0.5500780344009399, "learning_rate": 2.8058800467788493e-07, "loss": 0.0006, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1559 }, { "completion_length": 132.92857360839844, "epoch": 1.4971209213051824, "grad_norm": 1.7151708602905273, "kl": 0.4549247622489929, "learning_rate": 2.803198419473994e-07, "loss": 0.0005, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1560 }, { "completion_length": 220.1428680419922, "epoch": 1.4980806142034548, "grad_norm": 1.6303045749664307, "kl": 0.29785701632499695, "learning_rate": 2.800516438060459e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1561 }, { "completion_length": 183.7857208251953, "epoch": 1.4990403071017275, "grad_norm": 0.041981324553489685, "kl": 0.4118167459964752, "learning_rate": 2.797834105670559e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1562 }, { "completion_length": 150.21429443359375, "epoch": 1.5, "grad_norm": 1.7753727436065674, "kl": 0.43165677785873413, "learning_rate": 2.795151425437019e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1563 }, { "completion_length": 171.6428680419922, "epoch": 1.5009596928982725, "grad_norm": 1.0534688234329224, "kl": 0.3668169379234314, "learning_rate": 2.7924684004929683e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1564 }, { "completion_length": 171.57144165039062, "epoch": 1.5019193857965452, "grad_norm": 1.1163944005966187, "kl": 0.3657660186290741, "learning_rate": 2.7897850339719413e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1565 }, { "completion_length": 147.6428680419922, "epoch": 1.5028790786948176, "grad_norm": 1.8444877862930298, "kl": 0.41112372279167175, "learning_rate": 2.7871013290078713e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1566 }, { "completion_length": 134.85714721679688, "epoch": 1.5038387715930903, "grad_norm": 1.5912834405899048, "kl": 0.5232388377189636, "learning_rate": 2.7844172887350857e-07, "loss": 0.0005, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1567 }, { "completion_length": 155.92857360839844, "epoch": 1.5047984644913628, "grad_norm": 0.6920548677444458, "kl": 0.3287070691585541, "learning_rate": 2.781732916288303e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1568 }, { "completion_length": 189.42857360839844, "epoch": 1.5057581573896353, "grad_norm": 1.1758370399475098, "kl": 0.3354122042655945, "learning_rate": 2.779048214802631e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1569 }, { "completion_length": 167.21429443359375, "epoch": 1.5067178502879077, "grad_norm": 0.009435453452169895, "kl": 0.3119502663612366, "learning_rate": 2.776363187413562e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1570 }, { "completion_length": 190.07144165039062, "epoch": 1.5076775431861804, "grad_norm": 0.013909616507589817, "kl": 0.28379735350608826, "learning_rate": 2.773677837256967e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1571 }, { "completion_length": 134.71429443359375, "epoch": 1.508637236084453, "grad_norm": 1.8567227125167847, "kl": 0.3832535147666931, "learning_rate": 2.770992167469096e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1572 }, { "completion_length": 149.57144165039062, "epoch": 1.5095969289827256, "grad_norm": 1.9739238023757935, "kl": 0.4331956207752228, "learning_rate": 2.7683061811865713e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1573 }, { "completion_length": 195.42857360839844, "epoch": 1.510556621880998, "grad_norm": 0.014058453030884266, "kl": 0.3257710039615631, "learning_rate": 2.7656198815463856e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1574 }, { "completion_length": 185.57144165039062, "epoch": 1.5115163147792705, "grad_norm": 0.01057068258523941, "kl": 0.31904858350753784, "learning_rate": 2.7629332716858967e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1575 }, { "completion_length": 178.21429443359375, "epoch": 1.5124760076775432, "grad_norm": 0.016726583242416382, "kl": 0.33214083313941956, "learning_rate": 2.7602463547428237e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1576 }, { "completion_length": 152.7857208251953, "epoch": 1.513435700575816, "grad_norm": 1.3677748441696167, "kl": 0.3949216306209564, "learning_rate": 2.7575591338552474e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1577 }, { "completion_length": 135.1428680419922, "epoch": 1.5143953934740884, "grad_norm": 0.9974108338356018, "kl": 0.40390273928642273, "learning_rate": 2.754871612161601e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1578 }, { "completion_length": 173.35714721679688, "epoch": 1.5153550863723608, "grad_norm": 2.721909761428833, "kl": 4.892144680023193, "learning_rate": 2.752183792800671e-07, "loss": 0.0049, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1579 }, { "completion_length": 195.1428680419922, "epoch": 1.5163147792706333, "grad_norm": 1.6223275661468506, "kl": 0.3932650685310364, "learning_rate": 2.7494956789115884e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1580 }, { "completion_length": 193.07144165039062, "epoch": 1.517274472168906, "grad_norm": 0.016424749046564102, "kl": 0.31628772616386414, "learning_rate": 2.746807273633832e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1581 }, { "completion_length": 269.3571472167969, "epoch": 1.5182341650671785, "grad_norm": 0.015392513014376163, "kl": 0.23916558921337128, "learning_rate": 2.744118580107217e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1582 }, { "completion_length": 179.35714721679688, "epoch": 1.5191938579654511, "grad_norm": 2.11025071144104, "kl": 0.2939330041408539, "learning_rate": 2.7414296014719e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1583 }, { "completion_length": 168.1428680419922, "epoch": 1.5201535508637236, "grad_norm": 1.5475530624389648, "kl": 0.4709278345108032, "learning_rate": 2.738740340868367e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1584 }, { "completion_length": 141.7857208251953, "epoch": 1.521113243761996, "grad_norm": 1.275960087776184, "kl": 0.41895750164985657, "learning_rate": 2.7360508014374326e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1585 }, { "completion_length": 167.92857360839844, "epoch": 1.5220729366602685, "grad_norm": 2.0048294067382812, "kl": 0.34650641679763794, "learning_rate": 2.7333609863202413e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1586 }, { "completion_length": 182.1428680419922, "epoch": 1.5230326295585412, "grad_norm": 1.4047273397445679, "kl": 0.31911489367485046, "learning_rate": 2.730670898658255e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1587 }, { "completion_length": 165.0, "epoch": 1.523992322456814, "grad_norm": 0.010700098238885403, "kl": 0.34306368231773376, "learning_rate": 2.7279805415932567e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1588 }, { "completion_length": 180.07144165039062, "epoch": 1.5249520153550864, "grad_norm": 0.0097983842715621, "kl": 0.3387846350669861, "learning_rate": 2.725289918267343e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1589 }, { "completion_length": 146.92857360839844, "epoch": 1.5259117082533589, "grad_norm": 0.018517905846238136, "kl": 0.5639585256576538, "learning_rate": 2.722599031822922e-07, "loss": 0.0006, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1590 }, { "completion_length": 165.85714721679688, "epoch": 1.5268714011516313, "grad_norm": 0.009963112883269787, "kl": 0.3467957079410553, "learning_rate": 2.7199078854027085e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1591 }, { "completion_length": 176.71429443359375, "epoch": 1.527831094049904, "grad_norm": 1.2224124670028687, "kl": 0.3201090395450592, "learning_rate": 2.7172164821497214e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1592 }, { "completion_length": 202.7857208251953, "epoch": 1.5287907869481767, "grad_norm": 0.05951768159866333, "kl": 0.446550190448761, "learning_rate": 2.714524825207279e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1593 }, { "completion_length": 197.42857360839844, "epoch": 1.5297504798464492, "grad_norm": 1.6707817316055298, "kl": 0.32619795203208923, "learning_rate": 2.711832917718997e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1594 }, { "completion_length": 197.85714721679688, "epoch": 1.5307101727447217, "grad_norm": 0.8130376935005188, "kl": 0.31720638275146484, "learning_rate": 2.7091407628287814e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1595 }, { "completion_length": 196.92857360839844, "epoch": 1.5316698656429941, "grad_norm": 1.0010795593261719, "kl": 0.33296939730644226, "learning_rate": 2.706448363680831e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1596 }, { "completion_length": 168.21429443359375, "epoch": 1.5326295585412668, "grad_norm": 0.017739197239279747, "kl": 0.35995861887931824, "learning_rate": 2.703755723419626e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1597 }, { "completion_length": 155.0, "epoch": 1.5335892514395395, "grad_norm": 0.011559990234673023, "kl": 0.324632465839386, "learning_rate": 2.7010628451899296e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1598 }, { "completion_length": 151.92857360839844, "epoch": 1.534548944337812, "grad_norm": 1.2350507974624634, "kl": 0.4279475808143616, "learning_rate": 2.698369732136784e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1599 }, { "completion_length": 115.50000762939453, "epoch": 1.5355086372360844, "grad_norm": 1.3507115840911865, "kl": 0.5102164149284363, "learning_rate": 2.695676387405505e-07, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1600 }, { "completion_length": 170.0, "epoch": 1.536468330134357, "grad_norm": 1.0037530660629272, "kl": 0.40428197383880615, "learning_rate": 2.6929828141416784e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1601 }, { "completion_length": 168.2857208251953, "epoch": 1.5374280230326296, "grad_norm": 1.205862045288086, "kl": 0.3832032084465027, "learning_rate": 2.6902890154911564e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1602 }, { "completion_length": 230.35714721679688, "epoch": 1.538387715930902, "grad_norm": 0.012492435984313488, "kl": 0.28173384070396423, "learning_rate": 2.6875949946000566e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1603 }, { "completion_length": 166.92857360839844, "epoch": 1.5393474088291748, "grad_norm": 0.017871282994747162, "kl": 0.3521769642829895, "learning_rate": 2.684900754614753e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1604 }, { "completion_length": 163.7857208251953, "epoch": 1.5403071017274472, "grad_norm": 1.011390209197998, "kl": 0.3466937243938446, "learning_rate": 2.682206298681879e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1605 }, { "completion_length": 151.7857208251953, "epoch": 1.5412667946257197, "grad_norm": 0.015580259263515472, "kl": 0.4137659966945648, "learning_rate": 2.679511629948319e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1606 }, { "completion_length": 169.42857360839844, "epoch": 1.5422264875239922, "grad_norm": 0.7886037230491638, "kl": 0.3538943827152252, "learning_rate": 2.676816751561204e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1607 }, { "completion_length": 164.2857208251953, "epoch": 1.5431861804222649, "grad_norm": 1.1006876230239868, "kl": 0.34457749128341675, "learning_rate": 2.674121666667911e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1608 }, { "completion_length": 189.2857208251953, "epoch": 1.5441458733205375, "grad_norm": 0.012303364463150501, "kl": 0.29565221071243286, "learning_rate": 2.671426378416061e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1609 }, { "completion_length": 161.21429443359375, "epoch": 1.54510556621881, "grad_norm": 0.014142689295113087, "kl": 0.35457122325897217, "learning_rate": 2.6687308899535073e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1610 }, { "completion_length": 148.21429443359375, "epoch": 1.5460652591170825, "grad_norm": 2.246736526489258, "kl": 0.5135409832000732, "learning_rate": 2.6660352044283404e-07, "loss": 0.0005, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1611 }, { "completion_length": 157.57144165039062, "epoch": 1.547024952015355, "grad_norm": 0.015191853977739811, "kl": 0.48690715432167053, "learning_rate": 2.6633393249888806e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1612 }, { "completion_length": 146.07144165039062, "epoch": 1.5479846449136276, "grad_norm": 1.698182225227356, "kl": 0.4109371602535248, "learning_rate": 2.6606432547836753e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1613 }, { "completion_length": 143.21429443359375, "epoch": 1.5489443378119003, "grad_norm": 0.024149464443325996, "kl": 0.4656309187412262, "learning_rate": 2.6579469969614927e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1614 }, { "completion_length": 132.2857208251953, "epoch": 1.5499040307101728, "grad_norm": 0.9590649008750916, "kl": 0.5061752200126648, "learning_rate": 2.6552505546713203e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1615 }, { "completion_length": 238.4285888671875, "epoch": 1.5508637236084453, "grad_norm": 0.8536316752433777, "kl": 0.30829253792762756, "learning_rate": 2.652553931062364e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1616 }, { "completion_length": 137.7857208251953, "epoch": 1.5518234165067177, "grad_norm": 3.085942029953003, "kl": 0.509817898273468, "learning_rate": 2.649857129284038e-07, "loss": 0.0005, "reward": 1.6785714626312256, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1617 }, { "completion_length": 187.07144165039062, "epoch": 1.5527831094049904, "grad_norm": 1.2463700771331787, "kl": 0.31918275356292725, "learning_rate": 2.647160152485967e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1618 }, { "completion_length": 139.42857360839844, "epoch": 1.5537428023032631, "grad_norm": 1.6516529321670532, "kl": 0.43964606523513794, "learning_rate": 2.6444630038179776e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1619 }, { "completion_length": 202.50001525878906, "epoch": 1.5547024952015356, "grad_norm": 0.33029991388320923, "kl": 0.37287506461143494, "learning_rate": 2.6417656864301005e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1620 }, { "completion_length": 177.85714721679688, "epoch": 1.555662188099808, "grad_norm": 1.1691120862960815, "kl": 0.36182060837745667, "learning_rate": 2.639068203472559e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1621 }, { "completion_length": 157.85714721679688, "epoch": 1.5566218809980805, "grad_norm": 0.02233251929283142, "kl": 0.4331248998641968, "learning_rate": 2.636370558095775e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1622 }, { "completion_length": 148.07144165039062, "epoch": 1.5575815738963532, "grad_norm": 1.6434417963027954, "kl": 0.40002796053886414, "learning_rate": 2.633672753450355e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1623 }, { "completion_length": 124.0714340209961, "epoch": 1.5585412667946257, "grad_norm": 2.2558846473693848, "kl": 0.8116053938865662, "learning_rate": 2.6309747926870964e-07, "loss": 0.0008, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1624 }, { "completion_length": 243.35714721679688, "epoch": 1.5595009596928984, "grad_norm": 1.443615436553955, "kl": 0.27116408944129944, "learning_rate": 2.6282766789569736e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1625 }, { "completion_length": 205.57144165039062, "epoch": 1.5604606525911708, "grad_norm": 0.7000653147697449, "kl": 0.2948339879512787, "learning_rate": 2.625578415411146e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1626 }, { "completion_length": 161.71429443359375, "epoch": 1.5614203454894433, "grad_norm": 0.024212870746850967, "kl": 0.44130605459213257, "learning_rate": 2.6228800052009416e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1627 }, { "completion_length": 178.85714721679688, "epoch": 1.5623800383877158, "grad_norm": 0.02127791941165924, "kl": 0.40208742022514343, "learning_rate": 2.6201814514778635e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1628 }, { "completion_length": 165.1428680419922, "epoch": 1.5633397312859885, "grad_norm": 0.009617951698601246, "kl": 0.32005372643470764, "learning_rate": 2.6174827573935813e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1629 }, { "completion_length": 182.35714721679688, "epoch": 1.5642994241842612, "grad_norm": 1.34490168094635, "kl": 0.34011223912239075, "learning_rate": 2.614783926099929e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1630 }, { "completion_length": 224.57144165039062, "epoch": 1.5652591170825336, "grad_norm": 0.02808435820043087, "kl": 0.34648820757865906, "learning_rate": 2.6120849607489014e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1631 }, { "completion_length": 245.85714721679688, "epoch": 1.566218809980806, "grad_norm": 0.00941222533583641, "kl": 0.25272759795188904, "learning_rate": 2.6093858644926475e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1632 }, { "completion_length": 166.35714721679688, "epoch": 1.5671785028790786, "grad_norm": 0.013444889336824417, "kl": 0.3937775492668152, "learning_rate": 2.6066866404834715e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1633 }, { "completion_length": 201.1428680419922, "epoch": 1.5681381957773513, "grad_norm": 0.01483410969376564, "kl": 0.2573339343070984, "learning_rate": 2.603987291873826e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1634 }, { "completion_length": 153.7857208251953, "epoch": 1.569097888675624, "grad_norm": 2.024343252182007, "kl": 0.4475471079349518, "learning_rate": 2.6012878218163093e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1635 }, { "completion_length": 123.00000762939453, "epoch": 1.5700575815738964, "grad_norm": 2.210322141647339, "kl": 0.5469667315483093, "learning_rate": 2.59858823346366e-07, "loss": 0.0005, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1636 }, { "completion_length": 173.6428680419922, "epoch": 1.5710172744721689, "grad_norm": 0.695276141166687, "kl": 0.36979445815086365, "learning_rate": 2.595888529968758e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1637 }, { "completion_length": 162.7857208251953, "epoch": 1.5719769673704413, "grad_norm": 0.8495848774909973, "kl": 0.369882732629776, "learning_rate": 2.5931887144846154e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1638 }, { "completion_length": 121.92857360839844, "epoch": 1.572936660268714, "grad_norm": 0.012062497437000275, "kl": 0.44488656520843506, "learning_rate": 2.5904887901643756e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1639 }, { "completion_length": 163.42857360839844, "epoch": 1.5738963531669867, "grad_norm": 0.013249269686639309, "kl": 0.36287155747413635, "learning_rate": 2.5877887601613083e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1640 }, { "completion_length": 197.2857208251953, "epoch": 1.5748560460652592, "grad_norm": 0.009725936688482761, "kl": 0.3290664851665497, "learning_rate": 2.5850886276288086e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1641 }, { "completion_length": 202.57144165039062, "epoch": 1.5758157389635317, "grad_norm": 1.395603060722351, "kl": 0.32060882449150085, "learning_rate": 2.58238839572039e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1642 }, { "completion_length": 133.42857360839844, "epoch": 1.5767754318618041, "grad_norm": 0.015995802357792854, "kl": 0.4492207467556, "learning_rate": 2.5796880675896813e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1643 }, { "completion_length": 158.7857208251953, "epoch": 1.5777351247600768, "grad_norm": 1.1035051345825195, "kl": 0.40428775548934937, "learning_rate": 2.5769876463904263e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1644 }, { "completion_length": 207.7857208251953, "epoch": 1.5786948176583493, "grad_norm": 1.3982362747192383, "kl": 0.28289172053337097, "learning_rate": 2.574287135276474e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1645 }, { "completion_length": 168.5, "epoch": 1.579654510556622, "grad_norm": 0.01847156137228012, "kl": 0.3915877044200897, "learning_rate": 2.571586537401781e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1646 }, { "completion_length": 202.42857360839844, "epoch": 1.5806142034548945, "grad_norm": 1.7389581203460693, "kl": 0.4190170466899872, "learning_rate": 2.5688858559204053e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1647 }, { "completion_length": 193.2857208251953, "epoch": 1.581573896353167, "grad_norm": 1.7922883033752441, "kl": 0.37612682580947876, "learning_rate": 2.5661850939865004e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1648 }, { "completion_length": 167.0, "epoch": 1.5825335892514394, "grad_norm": 3.6263575553894043, "kl": 0.6360294818878174, "learning_rate": 2.5634842547543154e-07, "loss": 0.0006, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1649 }, { "completion_length": 151.6428680419922, "epoch": 1.583493282149712, "grad_norm": 1.5542055368423462, "kl": 0.38712331652641296, "learning_rate": 2.56078334137819e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1650 }, { "completion_length": 163.85714721679688, "epoch": 1.5844529750479848, "grad_norm": 0.01700812578201294, "kl": 0.41110798716545105, "learning_rate": 2.55808235701255e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1651 }, { "completion_length": 219.21429443359375, "epoch": 1.5854126679462572, "grad_norm": 0.011232857592403889, "kl": 0.27733761072158813, "learning_rate": 2.555381304811903e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1652 }, { "completion_length": 223.7857208251953, "epoch": 1.5863723608445297, "grad_norm": 0.011253518983721733, "kl": 0.27805468440055847, "learning_rate": 2.552680187930836e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1653 }, { "completion_length": 158.71429443359375, "epoch": 1.5873320537428022, "grad_norm": 0.013840853236615658, "kl": 0.4501377046108246, "learning_rate": 2.549979009524015e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1654 }, { "completion_length": 177.07144165039062, "epoch": 1.5882917466410749, "grad_norm": 1.2039047479629517, "kl": 0.3307506740093231, "learning_rate": 2.547277772746173e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1655 }, { "completion_length": 167.2857208251953, "epoch": 1.5892514395393476, "grad_norm": 0.8824782967567444, "kl": 0.37739333510398865, "learning_rate": 2.5445764807521145e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1656 }, { "completion_length": 171.35714721679688, "epoch": 1.59021113243762, "grad_norm": 1.3188049793243408, "kl": 0.4310920834541321, "learning_rate": 2.5418751366967066e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1657 }, { "completion_length": 162.21429443359375, "epoch": 1.5911708253358925, "grad_norm": 0.032097525894641876, "kl": 0.3662807047367096, "learning_rate": 2.539173743734879e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1658 }, { "completion_length": 193.21429443359375, "epoch": 1.592130518234165, "grad_norm": 0.012685731053352356, "kl": 0.3795715570449829, "learning_rate": 2.536472305021616e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1659 }, { "completion_length": 204.7857208251953, "epoch": 1.5930902111324377, "grad_norm": 1.1823344230651855, "kl": 0.2966923117637634, "learning_rate": 2.533770823711957e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1660 }, { "completion_length": 155.2857208251953, "epoch": 1.5940499040307101, "grad_norm": 0.03018760494887829, "kl": 0.4296683669090271, "learning_rate": 2.5310693029609927e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1661 }, { "completion_length": 195.57144165039062, "epoch": 1.5950095969289828, "grad_norm": 0.6679431796073914, "kl": 0.3494417369365692, "learning_rate": 2.5283677459238554e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1662 }, { "completion_length": 160.57144165039062, "epoch": 1.5959692898272553, "grad_norm": 0.020356310531497, "kl": 0.3755722939968109, "learning_rate": 2.525666155755725e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1663 }, { "completion_length": 176.1428680419922, "epoch": 1.5969289827255277, "grad_norm": 0.01607379876077175, "kl": 0.41890576481819153, "learning_rate": 2.522964535611816e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1664 }, { "completion_length": 197.92857360839844, "epoch": 1.5978886756238004, "grad_norm": 1.4493671655654907, "kl": 0.31870633363723755, "learning_rate": 2.5202628886473805e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1665 }, { "completion_length": 168.2857208251953, "epoch": 1.598848368522073, "grad_norm": 1.0640743970870972, "kl": 0.3160770535469055, "learning_rate": 2.5175612180177e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1666 }, { "completion_length": 155.71429443359375, "epoch": 1.5998080614203456, "grad_norm": 0.014527358114719391, "kl": 0.44427600502967834, "learning_rate": 2.514859526878084e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1667 }, { "completion_length": 214.07144165039062, "epoch": 1.600767754318618, "grad_norm": 1.4034186601638794, "kl": 0.32028502225875854, "learning_rate": 2.5121578183838685e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1668 }, { "completion_length": 163.57144165039062, "epoch": 1.6017274472168905, "grad_norm": 1.456252932548523, "kl": 0.37825530767440796, "learning_rate": 2.5094560956904077e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1669 }, { "completion_length": 188.50001525878906, "epoch": 1.602687140115163, "grad_norm": 0.012750155292451382, "kl": 0.33409371972084045, "learning_rate": 2.5067543619530704e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1670 }, { "completion_length": 181.92857360839844, "epoch": 1.6036468330134357, "grad_norm": 1.15876305103302, "kl": 0.4618554413318634, "learning_rate": 2.5040526203272416e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1671 }, { "completion_length": 141.5, "epoch": 1.6046065259117084, "grad_norm": 1.8564833402633667, "kl": 0.5173818469047546, "learning_rate": 2.5013508739683134e-07, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1672 }, { "completion_length": 236.6428680419922, "epoch": 1.6055662188099808, "grad_norm": 0.029660869389772415, "kl": 0.29197779297828674, "learning_rate": 2.4986491260316863e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1673 }, { "completion_length": 140.42857360839844, "epoch": 1.6065259117082533, "grad_norm": 0.01917838864028454, "kl": 0.4808562397956848, "learning_rate": 2.495947379672759e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1674 }, { "completion_length": 137.07144165039062, "epoch": 1.6074856046065258, "grad_norm": 0.014658104628324509, "kl": 0.44776493310928345, "learning_rate": 2.49324563804693e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1675 }, { "completion_length": 162.5, "epoch": 1.6084452975047985, "grad_norm": 0.019165286794304848, "kl": 0.3906031548976898, "learning_rate": 2.4905439043095926e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1676 }, { "completion_length": 173.50001525878906, "epoch": 1.6094049904030712, "grad_norm": 1.2711364030838013, "kl": 0.3758338987827301, "learning_rate": 2.4878421816161313e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1677 }, { "completion_length": 146.35714721679688, "epoch": 1.6103646833013436, "grad_norm": 1.4594204425811768, "kl": 0.3784467279911041, "learning_rate": 2.485140473121915e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1678 }, { "completion_length": 199.21429443359375, "epoch": 1.611324376199616, "grad_norm": 1.7688404321670532, "kl": 0.39487534761428833, "learning_rate": 2.4824387819823e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1679 }, { "completion_length": 165.21429443359375, "epoch": 1.6122840690978886, "grad_norm": 2.903879404067993, "kl": 0.4555986225605011, "learning_rate": 2.4797371113526203e-07, "loss": 0.0005, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1680 }, { "completion_length": 199.2857208251953, "epoch": 1.6132437619961613, "grad_norm": 0.9959331154823303, "kl": 0.3280127942562103, "learning_rate": 2.477035464388184e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1681 }, { "completion_length": 160.0, "epoch": 1.6142034548944337, "grad_norm": 1.2000210285186768, "kl": 0.37829920649528503, "learning_rate": 2.4743338442442754e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1682 }, { "completion_length": 136.2857208251953, "epoch": 1.6151631477927064, "grad_norm": 0.015523555688560009, "kl": 0.44988688826560974, "learning_rate": 2.4716322540761443e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1683 }, { "completion_length": 164.92857360839844, "epoch": 1.616122840690979, "grad_norm": 1.7024590969085693, "kl": 0.425823837518692, "learning_rate": 2.4689306970390076e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1684 }, { "completion_length": 161.0, "epoch": 1.6170825335892514, "grad_norm": 0.012950199656188488, "kl": 0.44096580147743225, "learning_rate": 2.466229176288043e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1685 }, { "completion_length": 206.57144165039062, "epoch": 1.6180422264875238, "grad_norm": 0.015359696932137012, "kl": 0.3766743838787079, "learning_rate": 2.463527694978384e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1686 }, { "completion_length": 166.21429443359375, "epoch": 1.6190019193857965, "grad_norm": 0.012063710950314999, "kl": 0.40058714151382446, "learning_rate": 2.460826256265122e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1687 }, { "completion_length": 131.2857208251953, "epoch": 1.6199616122840692, "grad_norm": 1.4529035091400146, "kl": 0.48732438683509827, "learning_rate": 2.458124863303293e-07, "loss": 0.0005, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1688 }, { "completion_length": 170.1428680419922, "epoch": 1.6209213051823417, "grad_norm": 1.5854740142822266, "kl": 0.36890146136283875, "learning_rate": 2.455423519247885e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1689 }, { "completion_length": 210.21429443359375, "epoch": 1.6218809980806141, "grad_norm": 0.02715381234884262, "kl": 0.29176396131515503, "learning_rate": 2.452722227253827e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1690 }, { "completion_length": 195.1428680419922, "epoch": 1.6228406909788866, "grad_norm": 0.019075211137533188, "kl": 0.34321463108062744, "learning_rate": 2.4500209904759846e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1691 }, { "completion_length": 144.42857360839844, "epoch": 1.6238003838771593, "grad_norm": 1.0345686674118042, "kl": 0.40607601404190063, "learning_rate": 2.447319812069163e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1692 }, { "completion_length": 216.71429443359375, "epoch": 1.624760076775432, "grad_norm": 1.0938445329666138, "kl": 0.2980417013168335, "learning_rate": 2.444618695188098e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1693 }, { "completion_length": 174.50001525878906, "epoch": 1.6257197696737045, "grad_norm": 0.01808006688952446, "kl": 0.3582616150379181, "learning_rate": 2.4419176429874504e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1694 }, { "completion_length": 186.92857360839844, "epoch": 1.626679462571977, "grad_norm": 1.0175223350524902, "kl": 0.33827871084213257, "learning_rate": 2.43921665862181e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1695 }, { "completion_length": 202.85714721679688, "epoch": 1.6276391554702494, "grad_norm": 0.01258647721260786, "kl": 0.36063721776008606, "learning_rate": 2.4365157452456844e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1696 }, { "completion_length": 228.9285888671875, "epoch": 1.628598848368522, "grad_norm": 0.018686512485146523, "kl": 0.33416926860809326, "learning_rate": 2.4338149060135e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1697 }, { "completion_length": 181.42857360839844, "epoch": 1.6295585412667948, "grad_norm": 0.029012564569711685, "kl": 0.369423508644104, "learning_rate": 2.431114144079595e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1698 }, { "completion_length": 135.57144165039062, "epoch": 1.6305182341650672, "grad_norm": 0.009962151758372784, "kl": 0.4277733564376831, "learning_rate": 2.428413462598219e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1699 }, { "completion_length": 182.92857360839844, "epoch": 1.6314779270633397, "grad_norm": 1.8580855131149292, "kl": 0.3469715416431427, "learning_rate": 2.425712864723527e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1700 }, { "completion_length": 143.5, "epoch": 1.6324376199616122, "grad_norm": 0.010354436002671719, "kl": 0.38799750804901123, "learning_rate": 2.4230123536095745e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1701 }, { "completion_length": 166.5, "epoch": 1.6333973128598849, "grad_norm": 0.06551414728164673, "kl": 0.4671778082847595, "learning_rate": 2.4203119324103184e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1702 }, { "completion_length": 200.7857208251953, "epoch": 1.6343570057581573, "grad_norm": 0.8755646347999573, "kl": 0.30589374899864197, "learning_rate": 2.4176116042796106e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1703 }, { "completion_length": 164.35714721679688, "epoch": 1.63531669865643, "grad_norm": 1.50246262550354, "kl": 0.3263620138168335, "learning_rate": 2.414911372371191e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1704 }, { "completion_length": 199.6428680419922, "epoch": 1.6362763915547025, "grad_norm": 0.013614840805530548, "kl": 0.3443022072315216, "learning_rate": 2.4122112398386914e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1705 }, { "completion_length": 140.1428680419922, "epoch": 1.637236084452975, "grad_norm": 2.4343435764312744, "kl": 0.5190836787223816, "learning_rate": 2.4095112098356247e-07, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1706 }, { "completion_length": 195.35714721679688, "epoch": 1.6381957773512474, "grad_norm": 1.027313470840454, "kl": 0.3325769603252411, "learning_rate": 2.406811285515385e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1707 }, { "completion_length": 186.07144165039062, "epoch": 1.6391554702495201, "grad_norm": 1.8535834550857544, "kl": 0.3480032682418823, "learning_rate": 2.4041114700312423e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1708 }, { "completion_length": 125.42857360839844, "epoch": 1.6401151631477928, "grad_norm": 1.2985104322433472, "kl": 0.5000232458114624, "learning_rate": 2.40141176653634e-07, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1709 }, { "completion_length": 231.07144165039062, "epoch": 1.6410748560460653, "grad_norm": 0.6867560744285583, "kl": 0.2845916748046875, "learning_rate": 2.398712178183691e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1710 }, { "completion_length": 123.42857360839844, "epoch": 1.6420345489443378, "grad_norm": 2.552736520767212, "kl": 0.5087679028511047, "learning_rate": 2.396012708126174e-07, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1711 }, { "completion_length": 216.85714721679688, "epoch": 1.6429942418426102, "grad_norm": 0.010276276618242264, "kl": 0.3582089841365814, "learning_rate": 2.3933133595165283e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1712 }, { "completion_length": 205.00001525878906, "epoch": 1.643953934740883, "grad_norm": 1.6707020998001099, "kl": 0.3259736895561218, "learning_rate": 2.3906141355073517e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1713 }, { "completion_length": 126.35714721679688, "epoch": 1.6449136276391556, "grad_norm": 2.7851712703704834, "kl": 0.5620639324188232, "learning_rate": 2.387915039251099e-07, "loss": 0.0006, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1714 }, { "completion_length": 200.85714721679688, "epoch": 1.645873320537428, "grad_norm": 0.855387806892395, "kl": 0.34341609477996826, "learning_rate": 2.3852160739000706e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1715 }, { "completion_length": 182.1428680419922, "epoch": 1.6468330134357005, "grad_norm": 1.4409455060958862, "kl": 0.3438756763935089, "learning_rate": 2.382517242606419e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1716 }, { "completion_length": 221.2857208251953, "epoch": 1.647792706333973, "grad_norm": 1.2046254873275757, "kl": 0.2721804976463318, "learning_rate": 2.3798185485221366e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1717 }, { "completion_length": 205.1428680419922, "epoch": 1.6487523992322457, "grad_norm": 0.008015393279492855, "kl": 0.2947402894496918, "learning_rate": 2.3771199947990584e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1718 }, { "completion_length": 183.50001525878906, "epoch": 1.6497120921305184, "grad_norm": 0.011709214188158512, "kl": 0.3543841242790222, "learning_rate": 2.3744215845888543e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1719 }, { "completion_length": 203.42857360839844, "epoch": 1.6506717850287909, "grad_norm": 1.3640674352645874, "kl": 0.34650537371635437, "learning_rate": 2.3717233210430254e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1720 }, { "completion_length": 179.42857360839844, "epoch": 1.6516314779270633, "grad_norm": 0.013728320598602295, "kl": 0.35919511318206787, "learning_rate": 2.3690252073129047e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1721 }, { "completion_length": 204.21429443359375, "epoch": 1.6525911708253358, "grad_norm": 0.024243813008069992, "kl": 0.36087682843208313, "learning_rate": 2.366327246549645e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1722 }, { "completion_length": 166.5, "epoch": 1.6535508637236085, "grad_norm": 1.790827989578247, "kl": 0.4160477817058563, "learning_rate": 2.3636294419042255e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1723 }, { "completion_length": 190.57144165039062, "epoch": 1.654510556621881, "grad_norm": 2.2452197074890137, "kl": 0.41719427704811096, "learning_rate": 2.360931796527441e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1724 }, { "completion_length": 182.50001525878906, "epoch": 1.6554702495201536, "grad_norm": 0.010118342004716396, "kl": 0.31601905822753906, "learning_rate": 2.3582343135698999e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1725 }, { "completion_length": 157.92857360839844, "epoch": 1.6564299424184261, "grad_norm": 0.8884227871894836, "kl": 0.4343673288822174, "learning_rate": 2.3555369961820217e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1726 }, { "completion_length": 193.07144165039062, "epoch": 1.6573896353166986, "grad_norm": 0.013247927650809288, "kl": 0.37173110246658325, "learning_rate": 2.3528398475140335e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1727 }, { "completion_length": 189.7857208251953, "epoch": 1.658349328214971, "grad_norm": 0.00988641194999218, "kl": 0.3274925947189331, "learning_rate": 2.3501428707159615e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1728 }, { "completion_length": 202.00001525878906, "epoch": 1.6593090211132437, "grad_norm": 0.012626759707927704, "kl": 0.3441075384616852, "learning_rate": 2.347446068937636e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1729 }, { "completion_length": 160.6428680419922, "epoch": 1.6602687140115164, "grad_norm": 1.1664559841156006, "kl": 0.4397423565387726, "learning_rate": 2.3447494453286792e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1730 }, { "completion_length": 202.2857208251953, "epoch": 1.661228406909789, "grad_norm": 0.012722408398985863, "kl": 0.3745518624782562, "learning_rate": 2.3420530030385076e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1731 }, { "completion_length": 170.92857360839844, "epoch": 1.6621880998080614, "grad_norm": 2.062929391860962, "kl": 0.3944624662399292, "learning_rate": 2.3393567452163248e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1732 }, { "completion_length": 173.6428680419922, "epoch": 1.6631477927063338, "grad_norm": 0.011524648405611515, "kl": 0.3166196346282959, "learning_rate": 2.336660675011119e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1733 }, { "completion_length": 220.07144165039062, "epoch": 1.6641074856046065, "grad_norm": 0.685336709022522, "kl": 0.30894166231155396, "learning_rate": 2.3339647955716604e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1734 }, { "completion_length": 167.7857208251953, "epoch": 1.6650671785028792, "grad_norm": 0.6987410187721252, "kl": 0.45844802260398865, "learning_rate": 2.3312691100464938e-07, "loss": 0.0005, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1735 }, { "completion_length": 161.57144165039062, "epoch": 1.6660268714011517, "grad_norm": 0.8834701776504517, "kl": 0.38559195399284363, "learning_rate": 2.3285736215839396e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1736 }, { "completion_length": 146.71429443359375, "epoch": 1.6669865642994242, "grad_norm": 1.842816948890686, "kl": 0.4578896462917328, "learning_rate": 2.3258783333320889e-07, "loss": 0.0005, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1737 }, { "completion_length": 134.42857360839844, "epoch": 1.6679462571976966, "grad_norm": 1.026280164718628, "kl": 0.5672340393066406, "learning_rate": 2.3231832484387964e-07, "loss": 0.0006, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1738 }, { "completion_length": 158.7857208251953, "epoch": 1.6689059500959693, "grad_norm": 0.026061737909913063, "kl": 0.4712987244129181, "learning_rate": 2.320488370051681e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1739 }, { "completion_length": 159.21429443359375, "epoch": 1.669865642994242, "grad_norm": 0.8735110759735107, "kl": 0.45290854573249817, "learning_rate": 2.3177937013181203e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1740 }, { "completion_length": 112.42857360839844, "epoch": 1.6708253358925145, "grad_norm": 1.5950676202774048, "kl": 0.5351567268371582, "learning_rate": 2.3150992453852476e-07, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1741 }, { "completion_length": 135.0, "epoch": 1.671785028790787, "grad_norm": 1.6599332094192505, "kl": 0.4239357113838196, "learning_rate": 2.3124050053999445e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1742 }, { "completion_length": 176.35714721679688, "epoch": 1.6727447216890594, "grad_norm": 0.025238126516342163, "kl": 0.37346312403678894, "learning_rate": 2.309710984508844e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1743 }, { "completion_length": 166.07144165039062, "epoch": 1.673704414587332, "grad_norm": 0.2546391785144806, "kl": 0.9110156893730164, "learning_rate": 2.3070171858583219e-07, "loss": 0.0009, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1744 }, { "completion_length": 149.07144165039062, "epoch": 1.6746641074856046, "grad_norm": 1.5201789140701294, "kl": 0.47386178374290466, "learning_rate": 2.3043236125944947e-07, "loss": 0.0005, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1745 }, { "completion_length": 162.42857360839844, "epoch": 1.6756238003838773, "grad_norm": 1.2735830545425415, "kl": 0.43392378091812134, "learning_rate": 2.3016302678632155e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1746 }, { "completion_length": 189.42857360839844, "epoch": 1.6765834932821497, "grad_norm": 1.1592366695404053, "kl": 0.4336989223957062, "learning_rate": 2.2989371548100696e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1747 }, { "completion_length": 175.2857208251953, "epoch": 1.6775431861804222, "grad_norm": 0.012137102894484997, "kl": 0.3585071861743927, "learning_rate": 2.2962442765803746e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1748 }, { "completion_length": 141.5, "epoch": 1.6785028790786947, "grad_norm": 0.020895391702651978, "kl": 0.5049291253089905, "learning_rate": 2.2935516363191693e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1749 }, { "completion_length": 171.57144165039062, "epoch": 1.6794625719769674, "grad_norm": 0.8383393883705139, "kl": 0.38026419281959534, "learning_rate": 2.2908592371712187e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1750 }, { "completion_length": 123.78572082519531, "epoch": 1.68042226487524, "grad_norm": 1.983709454536438, "kl": 0.42898404598236084, "learning_rate": 2.2881670822810034e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1751 }, { "completion_length": 169.1428680419922, "epoch": 1.6813819577735125, "grad_norm": 0.014563693664968014, "kl": 0.4177154302597046, "learning_rate": 2.2854751747927208e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1752 }, { "completion_length": 128.2857208251953, "epoch": 1.682341650671785, "grad_norm": 1.2057726383209229, "kl": 0.5451697111129761, "learning_rate": 2.282783517850279e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1753 }, { "completion_length": 186.7857208251953, "epoch": 1.6833013435700575, "grad_norm": 0.017162444069981575, "kl": 0.33698776364326477, "learning_rate": 2.2800921145972913e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1754 }, { "completion_length": 160.92857360839844, "epoch": 1.6842610364683301, "grad_norm": 0.020318420603871346, "kl": 0.5458692312240601, "learning_rate": 2.2774009681770785e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1755 }, { "completion_length": 195.42857360839844, "epoch": 1.6852207293666028, "grad_norm": 0.018151307478547096, "kl": 0.36596402525901794, "learning_rate": 2.2747100817326569e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1756 }, { "completion_length": 210.21429443359375, "epoch": 1.6861804222648753, "grad_norm": 0.02257165126502514, "kl": 0.38633137941360474, "learning_rate": 2.272019458406743e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1757 }, { "completion_length": 163.5, "epoch": 1.6871401151631478, "grad_norm": 0.013708218932151794, "kl": 0.4350506663322449, "learning_rate": 2.2693291013417452e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1758 }, { "completion_length": 170.92857360839844, "epoch": 1.6880998080614202, "grad_norm": 1.1126099824905396, "kl": 0.3245472013950348, "learning_rate": 2.2666390136797587e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1759 }, { "completion_length": 199.6428680419922, "epoch": 1.689059500959693, "grad_norm": 1.5012515783309937, "kl": 0.34817925095558167, "learning_rate": 2.2639491985625667e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1760 }, { "completion_length": 139.71429443359375, "epoch": 1.6900191938579654, "grad_norm": 2.0410046577453613, "kl": 0.3802946209907532, "learning_rate": 2.2612596591316333e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1761 }, { "completion_length": 161.6428680419922, "epoch": 1.690978886756238, "grad_norm": 0.04163544252514839, "kl": 0.4378994107246399, "learning_rate": 2.2585703985281e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1762 }, { "completion_length": 164.92857360839844, "epoch": 1.6919385796545106, "grad_norm": 1.6032174825668335, "kl": 0.3477104902267456, "learning_rate": 2.255881419892783e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1763 }, { "completion_length": 164.42857360839844, "epoch": 1.692898272552783, "grad_norm": 1.304498314857483, "kl": 0.40049558877944946, "learning_rate": 2.2531927263661685e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1764 }, { "completion_length": 162.42857360839844, "epoch": 1.6938579654510557, "grad_norm": 0.7737807035446167, "kl": 0.3752124011516571, "learning_rate": 2.2505043210884114e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1765 }, { "completion_length": 166.0, "epoch": 1.6948176583493282, "grad_norm": 0.011245260946452618, "kl": 0.3899635970592499, "learning_rate": 2.2478162071993296e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1766 }, { "completion_length": 176.00001525878906, "epoch": 1.6957773512476009, "grad_norm": 0.7633875012397766, "kl": 0.39074864983558655, "learning_rate": 2.2451283878383983e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1767 }, { "completion_length": 191.00001525878906, "epoch": 1.6967370441458733, "grad_norm": 0.029465503990650177, "kl": 0.37162166833877563, "learning_rate": 2.2424408661447531e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1768 }, { "completion_length": 159.5, "epoch": 1.6976967370441458, "grad_norm": 0.052361179143190384, "kl": 0.5538885593414307, "learning_rate": 2.2397536452571763e-07, "loss": 0.0006, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1769 }, { "completion_length": 173.50001525878906, "epoch": 1.6986564299424183, "grad_norm": 0.0599614717066288, "kl": 0.40602990984916687, "learning_rate": 2.2370667283141036e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1770 }, { "completion_length": 150.07144165039062, "epoch": 1.699616122840691, "grad_norm": 1.4405889511108398, "kl": 0.39160433411598206, "learning_rate": 2.2343801184536147e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1771 }, { "completion_length": 182.92857360839844, "epoch": 1.7005758157389637, "grad_norm": 0.015121269971132278, "kl": 0.33358585834503174, "learning_rate": 2.2316938188134282e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1772 }, { "completion_length": 136.1428680419922, "epoch": 1.7015355086372361, "grad_norm": 2.405205488204956, "kl": 0.41771572828292847, "learning_rate": 2.2290078325309035e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1773 }, { "completion_length": 209.2857208251953, "epoch": 1.7024952015355086, "grad_norm": 0.023010600358247757, "kl": 0.34057560563087463, "learning_rate": 2.2263221627430328e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1774 }, { "completion_length": 177.07144165039062, "epoch": 1.703454894433781, "grad_norm": 0.013163642957806587, "kl": 0.3017384111881256, "learning_rate": 2.2236368125864392e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1775 }, { "completion_length": 172.2857208251953, "epoch": 1.7044145873320538, "grad_norm": 0.010918336920440197, "kl": 0.30376118421554565, "learning_rate": 2.2209517851973694e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1776 }, { "completion_length": 139.6428680419922, "epoch": 1.7053742802303264, "grad_norm": 1.0875576734542847, "kl": 0.4140048921108246, "learning_rate": 2.2182670837116972e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1777 }, { "completion_length": 135.6428680419922, "epoch": 1.706333973128599, "grad_norm": 1.272343635559082, "kl": 0.4404941499233246, "learning_rate": 2.2155827112649144e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1778 }, { "completion_length": 176.21429443359375, "epoch": 1.7072936660268714, "grad_norm": 1.3886293172836304, "kl": 0.32963651418685913, "learning_rate": 2.2128986709921288e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1779 }, { "completion_length": 191.7857208251953, "epoch": 1.7082533589251438, "grad_norm": 0.9380366802215576, "kl": 0.30057492852211, "learning_rate": 2.2102149660280582e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1780 }, { "completion_length": 223.21429443359375, "epoch": 1.7092130518234165, "grad_norm": 0.008791402913630009, "kl": 0.2415468841791153, "learning_rate": 2.2075315995070315e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1781 }, { "completion_length": 192.7857208251953, "epoch": 1.710172744721689, "grad_norm": 0.01387123204767704, "kl": 0.33863601088523865, "learning_rate": 2.204848574562982e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1782 }, { "completion_length": 157.92857360839844, "epoch": 1.7111324376199617, "grad_norm": 0.009133369661867619, "kl": 0.3280183672904968, "learning_rate": 2.2021658943294407e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1783 }, { "completion_length": 174.1428680419922, "epoch": 1.7120921305182342, "grad_norm": 1.2676564455032349, "kl": 0.35774070024490356, "learning_rate": 2.199483561939541e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1784 }, { "completion_length": 150.92857360839844, "epoch": 1.7130518234165066, "grad_norm": 1.6044024229049683, "kl": 0.39401546120643616, "learning_rate": 2.196801580526006e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1785 }, { "completion_length": 247.2857208251953, "epoch": 1.714011516314779, "grad_norm": 1.1198292970657349, "kl": 0.22281888127326965, "learning_rate": 2.1941199532211513e-07, "loss": 0.0002, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1786 }, { "completion_length": 201.85714721679688, "epoch": 1.7149712092130518, "grad_norm": 0.015626737847924232, "kl": 0.3363315165042877, "learning_rate": 2.1914386831568785e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1787 }, { "completion_length": 166.57144165039062, "epoch": 1.7159309021113245, "grad_norm": 0.7555420398712158, "kl": 0.35348886251449585, "learning_rate": 2.1887577734646704e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1788 }, { "completion_length": 170.35714721679688, "epoch": 1.716890595009597, "grad_norm": 0.020864086225628853, "kl": 0.36996909976005554, "learning_rate": 2.186077227275592e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1789 }, { "completion_length": 171.07144165039062, "epoch": 1.7178502879078694, "grad_norm": 0.011103760451078415, "kl": 0.365914911031723, "learning_rate": 2.18339704772028e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1790 }, { "completion_length": 169.7857208251953, "epoch": 1.718809980806142, "grad_norm": 1.1647675037384033, "kl": 0.3219261169433594, "learning_rate": 2.1807172379289452e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1791 }, { "completion_length": 214.00001525878906, "epoch": 1.7197696737044146, "grad_norm": 1.1535106897354126, "kl": 0.32449573278427124, "learning_rate": 2.1780378010313672e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1792 }, { "completion_length": 145.07144165039062, "epoch": 1.7207293666026873, "grad_norm": 0.023853037506341934, "kl": 0.38897469639778137, "learning_rate": 2.1753587401568873e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1793 }, { "completion_length": 215.85714721679688, "epoch": 1.7216890595009597, "grad_norm": 0.011848253197968006, "kl": 0.3061572015285492, "learning_rate": 2.172680058434411e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1794 }, { "completion_length": 174.57144165039062, "epoch": 1.7226487523992322, "grad_norm": 1.3072028160095215, "kl": 0.3766343295574188, "learning_rate": 2.170001758992399e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1795 }, { "completion_length": 221.9285888671875, "epoch": 1.7236084452975047, "grad_norm": 1.3127832412719727, "kl": 0.26475489139556885, "learning_rate": 2.1673238449588665e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1796 }, { "completion_length": 130.92857360839844, "epoch": 1.7245681381957774, "grad_norm": 1.2396917343139648, "kl": 0.49932482838630676, "learning_rate": 2.1646463194613759e-07, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1797 }, { "completion_length": 138.07144165039062, "epoch": 1.72552783109405, "grad_norm": 0.9085126519203186, "kl": 0.3938709795475006, "learning_rate": 2.1619691856270393e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1798 }, { "completion_length": 144.07144165039062, "epoch": 1.7264875239923225, "grad_norm": 1.1039012670516968, "kl": 0.3769840896129608, "learning_rate": 2.1592924465825094e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1799 }, { "completion_length": 197.92857360839844, "epoch": 1.727447216890595, "grad_norm": 0.012824938632547855, "kl": 0.33952996134757996, "learning_rate": 2.1566161054539795e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1800 }, { "completion_length": 241.6428680419922, "epoch": 1.7284069097888675, "grad_norm": 0.01143456157296896, "kl": 0.24324601888656616, "learning_rate": 2.153940165367176e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1801 }, { "completion_length": 178.92857360839844, "epoch": 1.7293666026871402, "grad_norm": 0.013019613921642303, "kl": 0.3631245791912079, "learning_rate": 2.1512646294473592e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1802 }, { "completion_length": 150.35714721679688, "epoch": 1.7303262955854126, "grad_norm": 0.010415567085146904, "kl": 0.39817559719085693, "learning_rate": 2.1485895008193141e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1803 }, { "completion_length": 198.57144165039062, "epoch": 1.7312859884836853, "grad_norm": 1.1541141271591187, "kl": 0.27871471643447876, "learning_rate": 2.1459147826073536e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1804 }, { "completion_length": 174.00001525878906, "epoch": 1.7322456813819578, "grad_norm": 0.04900674894452095, "kl": 0.3824811577796936, "learning_rate": 2.1432404779353095e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1805 }, { "completion_length": 160.21429443359375, "epoch": 1.7332053742802302, "grad_norm": 1.4079821109771729, "kl": 0.3473271429538727, "learning_rate": 2.1405665899265302e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1806 }, { "completion_length": 160.71429443359375, "epoch": 1.7341650671785027, "grad_norm": 0.010170682333409786, "kl": 0.3281956911087036, "learning_rate": 2.1378931217038788e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1807 }, { "completion_length": 183.71429443359375, "epoch": 1.7351247600767754, "grad_norm": 0.025168605148792267, "kl": 0.34397175908088684, "learning_rate": 2.135220076389728e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1808 }, { "completion_length": 142.2857208251953, "epoch": 1.736084452975048, "grad_norm": 0.008595116436481476, "kl": 0.4059540331363678, "learning_rate": 2.1325474571059557e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1809 }, { "completion_length": 171.1428680419922, "epoch": 1.7370441458733206, "grad_norm": 1.1770309209823608, "kl": 0.31159600615501404, "learning_rate": 2.1298752669739412e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1810 }, { "completion_length": 175.7857208251953, "epoch": 1.738003838771593, "grad_norm": 1.3376190662384033, "kl": 0.29266858100891113, "learning_rate": 2.127203509114565e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1811 }, { "completion_length": 195.57144165039062, "epoch": 1.7389635316698655, "grad_norm": 0.008689062669873238, "kl": 0.2940903306007385, "learning_rate": 2.1245321866482015e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1812 }, { "completion_length": 150.07144165039062, "epoch": 1.7399232245681382, "grad_norm": 0.8401545286178589, "kl": 0.41317832469940186, "learning_rate": 2.1218613026947177e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1813 }, { "completion_length": 176.71429443359375, "epoch": 1.7408829174664109, "grad_norm": 1.0161076784133911, "kl": 0.400284081697464, "learning_rate": 2.119190860373466e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1814 }, { "completion_length": 188.85714721679688, "epoch": 1.7418426103646834, "grad_norm": 0.01698787324130535, "kl": 0.27849432826042175, "learning_rate": 2.1165208628032861e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1815 }, { "completion_length": 169.7857208251953, "epoch": 1.7428023032629558, "grad_norm": 0.008821507915854454, "kl": 0.36160141229629517, "learning_rate": 2.1138513131024967e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1816 }, { "completion_length": 133.6428680419922, "epoch": 1.7437619961612283, "grad_norm": 0.0479695089161396, "kl": 0.42075055837631226, "learning_rate": 2.1111822143888928e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1817 }, { "completion_length": 194.42857360839844, "epoch": 1.744721689059501, "grad_norm": 1.3717389106750488, "kl": 0.3524995744228363, "learning_rate": 2.1085135697797424e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1818 }, { "completion_length": 141.07144165039062, "epoch": 1.7456813819577737, "grad_norm": 0.02784375660121441, "kl": 0.5088683366775513, "learning_rate": 2.1058453823917854e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1819 }, { "completion_length": 151.7857208251953, "epoch": 1.7466410748560461, "grad_norm": 1.0845286846160889, "kl": 0.4051264226436615, "learning_rate": 2.103177655341226e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1820 }, { "completion_length": 174.00001525878906, "epoch": 1.7476007677543186, "grad_norm": 1.2856926918029785, "kl": 0.4137974977493286, "learning_rate": 2.100510391743732e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1821 }, { "completion_length": 150.6428680419922, "epoch": 1.748560460652591, "grad_norm": 0.930648684501648, "kl": 0.4135783016681671, "learning_rate": 2.097843594714428e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1822 }, { "completion_length": 163.2857208251953, "epoch": 1.7495201535508638, "grad_norm": 0.008449151180684566, "kl": 0.3436569571495056, "learning_rate": 2.0951772673678962e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1823 }, { "completion_length": 195.21429443359375, "epoch": 1.7504798464491362, "grad_norm": 0.9711849689483643, "kl": 0.27262169122695923, "learning_rate": 2.0925114128181668e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1824 }, { "completion_length": 220.00001525878906, "epoch": 1.751439539347409, "grad_norm": 1.5678126811981201, "kl": 0.25449618697166443, "learning_rate": 2.0898460341787211e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1825 }, { "completion_length": 211.2857208251953, "epoch": 1.7523992322456814, "grad_norm": 1.0674076080322266, "kl": 0.274914413690567, "learning_rate": 2.0871811345624836e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1826 }, { "completion_length": 203.92857360839844, "epoch": 1.7533589251439539, "grad_norm": 0.008762500248849392, "kl": 0.2822112739086151, "learning_rate": 2.0845167170818182e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1827 }, { "completion_length": 140.1428680419922, "epoch": 1.7543186180422263, "grad_norm": 0.009465295821428299, "kl": 0.4278246760368347, "learning_rate": 2.0818527848485273e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1828 }, { "completion_length": 154.1428680419922, "epoch": 1.755278310940499, "grad_norm": 0.009670273400843143, "kl": 0.3549700677394867, "learning_rate": 2.079189340973846e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1829 }, { "completion_length": 206.35714721679688, "epoch": 1.7562380038387717, "grad_norm": 1.625991702079773, "kl": 0.4398762285709381, "learning_rate": 2.0765263885684392e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1830 }, { "completion_length": 185.57144165039062, "epoch": 1.7571976967370442, "grad_norm": 0.013299784623086452, "kl": 0.3519715964794159, "learning_rate": 2.073863930742396e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1831 }, { "completion_length": 208.50001525878906, "epoch": 1.7581573896353166, "grad_norm": 0.008310815319418907, "kl": 0.2756984531879425, "learning_rate": 2.0712019706052304e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1832 }, { "completion_length": 174.07144165039062, "epoch": 1.7591170825335891, "grad_norm": 0.942729651927948, "kl": 0.35188567638397217, "learning_rate": 2.068540511265874e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1833 }, { "completion_length": 162.6428680419922, "epoch": 1.7600767754318618, "grad_norm": 0.8663691282272339, "kl": 0.3848177492618561, "learning_rate": 2.065879555832674e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1834 }, { "completion_length": 157.92857360839844, "epoch": 1.7610364683301345, "grad_norm": 0.017080102115869522, "kl": 0.5251061320304871, "learning_rate": 2.0632191074133883e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1835 }, { "completion_length": 200.07144165039062, "epoch": 1.761996161228407, "grad_norm": 0.7776902318000793, "kl": 0.30595093965530396, "learning_rate": 2.060559169115184e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1836 }, { "completion_length": 163.6428680419922, "epoch": 1.7629558541266794, "grad_norm": 0.01804971508681774, "kl": 0.3707953691482544, "learning_rate": 2.0578997440446303e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1837 }, { "completion_length": 210.1428680419922, "epoch": 1.763915547024952, "grad_norm": 0.008239888586103916, "kl": 0.2830318808555603, "learning_rate": 2.055240835307699e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1838 }, { "completion_length": 192.42857360839844, "epoch": 1.7648752399232246, "grad_norm": 0.009844167158007622, "kl": 0.29087111353874207, "learning_rate": 2.0525824460097568e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1839 }, { "completion_length": 153.5, "epoch": 1.7658349328214973, "grad_norm": 1.260759949684143, "kl": 0.3865780532360077, "learning_rate": 2.0499245792555664e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1840 }, { "completion_length": 150.0, "epoch": 1.7667946257197698, "grad_norm": 0.057729825377464294, "kl": 0.5110629796981812, "learning_rate": 2.0472672381492784e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1841 }, { "completion_length": 142.1428680419922, "epoch": 1.7677543186180422, "grad_norm": 2.1962716579437256, "kl": 0.4925171732902527, "learning_rate": 2.0446104257944302e-07, "loss": 0.0005, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1842 }, { "completion_length": 156.42857360839844, "epoch": 1.7687140115163147, "grad_norm": 0.011761359870433807, "kl": 0.3585590124130249, "learning_rate": 2.0419541452939422e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1843 }, { "completion_length": 225.71429443359375, "epoch": 1.7696737044145874, "grad_norm": 1.6216622591018677, "kl": 0.2937774360179901, "learning_rate": 2.0392983997501105e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1844 }, { "completion_length": 188.50001525878906, "epoch": 1.7706333973128598, "grad_norm": 0.7763956189155579, "kl": 0.3842843472957611, "learning_rate": 2.03664319226461e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1845 }, { "completion_length": 179.7857208251953, "epoch": 1.7715930902111325, "grad_norm": 0.7300623059272766, "kl": 0.3143831789493561, "learning_rate": 2.0339885259384858e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1846 }, { "completion_length": 201.6428680419922, "epoch": 1.772552783109405, "grad_norm": 1.04839289188385, "kl": 0.28709426522254944, "learning_rate": 2.031334403872152e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1847 }, { "completion_length": 150.5, "epoch": 1.7735124760076775, "grad_norm": 2.299222230911255, "kl": 0.39210328459739685, "learning_rate": 2.028680829165385e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1848 }, { "completion_length": 218.21429443359375, "epoch": 1.77447216890595, "grad_norm": 0.6055476069450378, "kl": 0.2728532552719116, "learning_rate": 2.0260278049173235e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1849 }, { "completion_length": 177.1428680419922, "epoch": 1.7754318618042226, "grad_norm": 0.01251512672752142, "kl": 0.3831009268760681, "learning_rate": 2.0233753342264643e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1850 }, { "completion_length": 155.5, "epoch": 1.7763915547024953, "grad_norm": 1.4363501071929932, "kl": 0.37457120418548584, "learning_rate": 2.0207234201906545e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1851 }, { "completion_length": 142.35714721679688, "epoch": 1.7773512476007678, "grad_norm": 1.3467196226119995, "kl": 0.4677678644657135, "learning_rate": 2.0180720659070927e-07, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1852 }, { "completion_length": 217.21429443359375, "epoch": 1.7783109404990403, "grad_norm": 0.012585796415805817, "kl": 0.2836991250514984, "learning_rate": 2.0154212744723247e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1853 }, { "completion_length": 167.6428680419922, "epoch": 1.7792706333973127, "grad_norm": 0.7482296824455261, "kl": 0.35015857219696045, "learning_rate": 2.0127710489822385e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1854 }, { "completion_length": 126.85714721679688, "epoch": 1.7802303262955854, "grad_norm": 0.025864453986287117, "kl": 0.5136538743972778, "learning_rate": 2.010121392532061e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1855 }, { "completion_length": 183.71429443359375, "epoch": 1.781190019193858, "grad_norm": 0.6854145526885986, "kl": 0.33354073762893677, "learning_rate": 2.007472308216353e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1856 }, { "completion_length": 202.85714721679688, "epoch": 1.7821497120921306, "grad_norm": 0.05068637803196907, "kl": 0.39658457040786743, "learning_rate": 2.0048237991290107e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1857 }, { "completion_length": 197.85714721679688, "epoch": 1.783109404990403, "grad_norm": 0.010712569579482079, "kl": 0.3150337338447571, "learning_rate": 2.0021758683632535e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1858 }, { "completion_length": 167.71429443359375, "epoch": 1.7840690978886755, "grad_norm": 1.391777515411377, "kl": 0.3107951581478119, "learning_rate": 1.99952851901163e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1859 }, { "completion_length": 237.71429443359375, "epoch": 1.7850287907869482, "grad_norm": 0.006541922688484192, "kl": 0.2284018099308014, "learning_rate": 1.9968817541660067e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1860 }, { "completion_length": 204.7857208251953, "epoch": 1.785988483685221, "grad_norm": 0.9331498742103577, "kl": 0.3164394497871399, "learning_rate": 1.9942355769175689e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1861 }, { "completion_length": 167.6428680419922, "epoch": 1.7869481765834934, "grad_norm": 0.012757424265146255, "kl": 0.3517015874385834, "learning_rate": 1.9915899903568157e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1862 }, { "completion_length": 174.57144165039062, "epoch": 1.7879078694817658, "grad_norm": 1.0673339366912842, "kl": 0.3253585398197174, "learning_rate": 1.9889449975735568e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1863 }, { "completion_length": 217.07144165039062, "epoch": 1.7888675623800383, "grad_norm": 0.05526060611009598, "kl": 0.3057302236557007, "learning_rate": 1.9863006016569066e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1864 }, { "completion_length": 189.71429443359375, "epoch": 1.789827255278311, "grad_norm": 0.014511408284306526, "kl": 0.32986265420913696, "learning_rate": 1.9836568056952827e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1865 }, { "completion_length": 185.00001525878906, "epoch": 1.7907869481765835, "grad_norm": 0.010506662540137768, "kl": 0.3015686869621277, "learning_rate": 1.9810136127764032e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1866 }, { "completion_length": 168.0, "epoch": 1.7917466410748562, "grad_norm": 1.2148420810699463, "kl": 0.28269365429878235, "learning_rate": 1.9783710259872818e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1867 }, { "completion_length": 142.5, "epoch": 1.7927063339731286, "grad_norm": 0.031118107959628105, "kl": 0.448001891374588, "learning_rate": 1.9757290484142243e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1868 }, { "completion_length": 142.92857360839844, "epoch": 1.793666026871401, "grad_norm": 0.997117280960083, "kl": 0.3831076920032501, "learning_rate": 1.9730876831428233e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1869 }, { "completion_length": 257.7857360839844, "epoch": 1.7946257197696736, "grad_norm": 0.00892902072519064, "kl": 0.2401788830757141, "learning_rate": 1.9704469332579598e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1870 }, { "completion_length": 197.00001525878906, "epoch": 1.7955854126679462, "grad_norm": 0.01253671757876873, "kl": 0.3195840120315552, "learning_rate": 1.9678068018437914e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1871 }, { "completion_length": 169.7857208251953, "epoch": 1.796545105566219, "grad_norm": 1.0811400413513184, "kl": 0.2964545786380768, "learning_rate": 1.965167291983757e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1872 }, { "completion_length": 201.57144165039062, "epoch": 1.7975047984644914, "grad_norm": 0.009812836535274982, "kl": 0.2494146078824997, "learning_rate": 1.9625284067605676e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1873 }, { "completion_length": 129.71429443359375, "epoch": 1.7984644913627639, "grad_norm": 1.4570451974868774, "kl": 0.6910393834114075, "learning_rate": 1.9598901492562063e-07, "loss": 0.0007, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1874 }, { "completion_length": 214.21429443359375, "epoch": 1.7994241842610363, "grad_norm": 0.018889635801315308, "kl": 0.3432825207710266, "learning_rate": 1.9572525225519224e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1875 }, { "completion_length": 136.1428680419922, "epoch": 1.800383877159309, "grad_norm": 1.7672898769378662, "kl": 0.41957491636276245, "learning_rate": 1.9546155297282284e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1876 }, { "completion_length": 183.1428680419922, "epoch": 1.8013435700575817, "grad_norm": 1.9620165824890137, "kl": 0.330174058675766, "learning_rate": 1.951979173864897e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1877 }, { "completion_length": 108.71428680419922, "epoch": 1.8023032629558542, "grad_norm": 2.2216391563415527, "kl": 0.5115470290184021, "learning_rate": 1.9493434580409544e-07, "loss": 0.0005, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1878 }, { "completion_length": 191.92857360839844, "epoch": 1.8032629558541267, "grad_norm": 0.8165927529335022, "kl": 0.3134758472442627, "learning_rate": 1.946708385334682e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1879 }, { "completion_length": 159.35714721679688, "epoch": 1.8042226487523991, "grad_norm": 1.2981735467910767, "kl": 0.3514205515384674, "learning_rate": 1.9440739588236113e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1880 }, { "completion_length": 168.5, "epoch": 1.8051823416506718, "grad_norm": 1.739724040031433, "kl": 0.35815873742103577, "learning_rate": 1.941440181584515e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1881 }, { "completion_length": 203.35714721679688, "epoch": 1.8061420345489443, "grad_norm": 0.023193545639514923, "kl": 0.36888301372528076, "learning_rate": 1.9388070566934105e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 1882 }, { "completion_length": 161.7857208251953, "epoch": 1.807101727447217, "grad_norm": 1.511566400527954, "kl": 0.35440242290496826, "learning_rate": 1.936174587225553e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1883 }, { "completion_length": 185.6428680419922, "epoch": 1.8080614203454894, "grad_norm": 0.017290471121668816, "kl": 0.3661518692970276, "learning_rate": 1.933542776255432e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1884 }, { "completion_length": 178.85714721679688, "epoch": 1.809021113243762, "grad_norm": 1.3949555158615112, "kl": 0.43904387950897217, "learning_rate": 1.9309116268567671e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1885 }, { "completion_length": 177.85714721679688, "epoch": 1.8099808061420346, "grad_norm": 0.022308118641376495, "kl": 0.3239156901836395, "learning_rate": 1.9282811421025052e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1886 }, { "completion_length": 161.85714721679688, "epoch": 1.810940499040307, "grad_norm": 2.2261366844177246, "kl": 0.3088754415512085, "learning_rate": 1.9256513250648182e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1887 }, { "completion_length": 139.5, "epoch": 1.8119001919385798, "grad_norm": 0.05003291741013527, "kl": 0.46628811955451965, "learning_rate": 1.923022178815098e-07, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1888 }, { "completion_length": 180.35714721679688, "epoch": 1.8128598848368522, "grad_norm": 0.9347400665283203, "kl": 0.31284844875335693, "learning_rate": 1.9203937064239525e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1889 }, { "completion_length": 173.2857208251953, "epoch": 1.8138195777351247, "grad_norm": 0.8577231168746948, "kl": 0.37134698033332825, "learning_rate": 1.9177659109612025e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1890 }, { "completion_length": 150.85714721679688, "epoch": 1.8147792706333972, "grad_norm": 1.7927157878875732, "kl": 0.3172193467617035, "learning_rate": 1.9151387954958792e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1891 }, { "completion_length": 186.85714721679688, "epoch": 1.8157389635316699, "grad_norm": 0.017083659768104553, "kl": 0.32684364914894104, "learning_rate": 1.9125123630962173e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1892 }, { "completion_length": 188.57144165039062, "epoch": 1.8166986564299425, "grad_norm": 0.020705347880721092, "kl": 0.35029342770576477, "learning_rate": 1.909886616829657e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1893 }, { "completion_length": 149.57144165039062, "epoch": 1.817658349328215, "grad_norm": 2.2820754051208496, "kl": 0.4049258530139923, "learning_rate": 1.9072615597628336e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1894 }, { "completion_length": 166.85714721679688, "epoch": 1.8186180422264875, "grad_norm": 0.8076321482658386, "kl": 0.27668026089668274, "learning_rate": 1.904637194961581e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1895 }, { "completion_length": 135.1428680419922, "epoch": 1.81957773512476, "grad_norm": 0.008904382586479187, "kl": 0.3628988564014435, "learning_rate": 1.902013525490922e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1896 }, { "completion_length": 157.2857208251953, "epoch": 1.8205374280230326, "grad_norm": 0.05270260572433472, "kl": 0.36746320128440857, "learning_rate": 1.8993905544150696e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1897 }, { "completion_length": 229.00001525878906, "epoch": 1.8214971209213053, "grad_norm": 0.8113738298416138, "kl": 0.2327675223350525, "learning_rate": 1.8967682847974193e-07, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1898 }, { "completion_length": 181.2857208251953, "epoch": 1.8224568138195778, "grad_norm": 0.01045924797654152, "kl": 0.3180352449417114, "learning_rate": 1.894146719700546e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1899 }, { "completion_length": 171.85714721679688, "epoch": 1.8234165067178503, "grad_norm": 0.021138200536370277, "kl": 0.3543447256088257, "learning_rate": 1.8915258621862053e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1900 }, { "completion_length": 169.85714721679688, "epoch": 1.8243761996161227, "grad_norm": 0.010435312055051327, "kl": 0.373360276222229, "learning_rate": 1.8889057153153255e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1901 }, { "completion_length": 179.35714721679688, "epoch": 1.8253358925143954, "grad_norm": 1.6297779083251953, "kl": 0.3529931604862213, "learning_rate": 1.886286282148002e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1902 }, { "completion_length": 246.6428680419922, "epoch": 1.826295585412668, "grad_norm": 0.008448568172752857, "kl": 0.24915975332260132, "learning_rate": 1.8836675657435006e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1903 }, { "completion_length": 139.07144165039062, "epoch": 1.8272552783109406, "grad_norm": 1.559455156326294, "kl": 0.370390921831131, "learning_rate": 1.8810495691602484e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1904 }, { "completion_length": 163.71429443359375, "epoch": 1.828214971209213, "grad_norm": 1.2662568092346191, "kl": 0.4726087152957916, "learning_rate": 1.87843229545583e-07, "loss": 0.0005, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1905 }, { "completion_length": 167.5, "epoch": 1.8291746641074855, "grad_norm": 0.9727115035057068, "kl": 0.2909177541732788, "learning_rate": 1.875815747686989e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1906 }, { "completion_length": 167.85714721679688, "epoch": 1.830134357005758, "grad_norm": 0.008976382203400135, "kl": 0.27940815687179565, "learning_rate": 1.8731999289096178e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1907 }, { "completion_length": 213.7857208251953, "epoch": 1.8310940499040307, "grad_norm": 0.010867893695831299, "kl": 0.2800949513912201, "learning_rate": 1.8705848421787608e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1908 }, { "completion_length": 193.1428680419922, "epoch": 1.8320537428023034, "grad_norm": 0.013171780854463577, "kl": 0.3269929587841034, "learning_rate": 1.867970490548605e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1909 }, { "completion_length": 172.35714721679688, "epoch": 1.8330134357005758, "grad_norm": 0.008797976188361645, "kl": 0.2703763246536255, "learning_rate": 1.8653568770724803e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1910 }, { "completion_length": 235.9285888671875, "epoch": 1.8339731285988483, "grad_norm": 1.1883255243301392, "kl": 0.26123595237731934, "learning_rate": 1.8627440048028538e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1911 }, { "completion_length": 265.2857360839844, "epoch": 1.8349328214971208, "grad_norm": 0.5791863799095154, "kl": 0.19016896188259125, "learning_rate": 1.8601318767913257e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1912 }, { "completion_length": 147.92857360839844, "epoch": 1.8358925143953935, "grad_norm": 1.3606555461883545, "kl": 0.3356018364429474, "learning_rate": 1.8575204960886293e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1913 }, { "completion_length": 145.2857208251953, "epoch": 1.8368522072936662, "grad_norm": 1.3709949254989624, "kl": 0.3719371557235718, "learning_rate": 1.8549098657446244e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1914 }, { "completion_length": 134.5, "epoch": 1.8378119001919386, "grad_norm": 2.171954393386841, "kl": 0.39872676134109497, "learning_rate": 1.852299988808293e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1915 }, { "completion_length": 184.35714721679688, "epoch": 1.838771593090211, "grad_norm": 1.226800799369812, "kl": 0.3412037789821625, "learning_rate": 1.8496908683277393e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1916 }, { "completion_length": 198.85714721679688, "epoch": 1.8397312859884836, "grad_norm": 0.015235371887683868, "kl": 0.33398500084877014, "learning_rate": 1.8470825073501826e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1917 }, { "completion_length": 230.2857208251953, "epoch": 1.8406909788867563, "grad_norm": 0.9961339235305786, "kl": 0.2490963488817215, "learning_rate": 1.844474908921957e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1918 }, { "completion_length": 157.21429443359375, "epoch": 1.841650671785029, "grad_norm": 0.009566586464643478, "kl": 0.33078014850616455, "learning_rate": 1.8418680760885024e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1919 }, { "completion_length": 206.92857360839844, "epoch": 1.8426103646833014, "grad_norm": 0.007939925417304039, "kl": 0.2808731496334076, "learning_rate": 1.8392620118943674e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1920 }, { "completion_length": 173.1428680419922, "epoch": 1.8435700575815739, "grad_norm": 1.815669298171997, "kl": 0.35683658719062805, "learning_rate": 1.8366567193832028e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1921 }, { "completion_length": 147.07144165039062, "epoch": 1.8445297504798464, "grad_norm": 1.1887341737747192, "kl": 0.44294536113739014, "learning_rate": 1.834052201597758e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1922 }, { "completion_length": 140.21429443359375, "epoch": 1.845489443378119, "grad_norm": 0.011751845479011536, "kl": 0.38209253549575806, "learning_rate": 1.831448461579876e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1923 }, { "completion_length": 193.1428680419922, "epoch": 1.8464491362763915, "grad_norm": 1.5173895359039307, "kl": 0.30341067910194397, "learning_rate": 1.8288455023704934e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1924 }, { "completion_length": 175.07144165039062, "epoch": 1.8474088291746642, "grad_norm": 0.009885312989354134, "kl": 0.2890132665634155, "learning_rate": 1.826243327009635e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1925 }, { "completion_length": 180.57144165039062, "epoch": 1.8483685220729367, "grad_norm": 0.9786601066589355, "kl": 0.3394984304904938, "learning_rate": 1.8236419385364072e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1926 }, { "completion_length": 155.5, "epoch": 1.8493282149712091, "grad_norm": 1.6958726644515991, "kl": 0.3985558748245239, "learning_rate": 1.821041339989001e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1927 }, { "completion_length": 181.1428680419922, "epoch": 1.8502879078694816, "grad_norm": 0.00700026098638773, "kl": 0.28587403893470764, "learning_rate": 1.8184415344046825e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1928 }, { "completion_length": 145.57144165039062, "epoch": 1.8512476007677543, "grad_norm": 0.023506905883550644, "kl": 0.37414029240608215, "learning_rate": 1.8158425248197928e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1929 }, { "completion_length": 160.35714721679688, "epoch": 1.852207293666027, "grad_norm": 2.592574119567871, "kl": 0.46542462706565857, "learning_rate": 1.813244314269743e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1930 }, { "completion_length": 215.7857208251953, "epoch": 1.8531669865642995, "grad_norm": 1.5825695991516113, "kl": 0.2887088656425476, "learning_rate": 1.8106469057890112e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1931 }, { "completion_length": 192.00001525878906, "epoch": 1.854126679462572, "grad_norm": 1.5903030633926392, "kl": 0.2834071218967438, "learning_rate": 1.8080503024111392e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1932 }, { "completion_length": 171.35714721679688, "epoch": 1.8550863723608444, "grad_norm": 0.8892157673835754, "kl": 0.33015045523643494, "learning_rate": 1.805454507168726e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1933 }, { "completion_length": 170.6428680419922, "epoch": 1.856046065259117, "grad_norm": 1.283483862876892, "kl": 0.41137269139289856, "learning_rate": 1.8028595230934299e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1934 }, { "completion_length": 151.92857360839844, "epoch": 1.8570057581573898, "grad_norm": 1.89406418800354, "kl": 0.3373829424381256, "learning_rate": 1.8002653532159605e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 1935 }, { "completion_length": 150.57144165039062, "epoch": 1.8579654510556622, "grad_norm": 2.3552584648132324, "kl": 0.4371712803840637, "learning_rate": 1.7976720005660767e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1936 }, { "completion_length": 145.6428680419922, "epoch": 1.8589251439539347, "grad_norm": 1.8996237516403198, "kl": 0.3886772692203522, "learning_rate": 1.7950794681725821e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1937 }, { "completion_length": 187.57144165039062, "epoch": 1.8598848368522072, "grad_norm": 0.008188692852854729, "kl": 0.2824779450893402, "learning_rate": 1.7924877590633237e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1938 }, { "completion_length": 161.0, "epoch": 1.8608445297504799, "grad_norm": 0.010119221173226833, "kl": 0.37784114480018616, "learning_rate": 1.7898968762651874e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1939 }, { "completion_length": 151.1428680419922, "epoch": 1.8618042226487526, "grad_norm": 1.1740511655807495, "kl": 0.6031515002250671, "learning_rate": 1.7873068228040912e-07, "loss": 0.0006, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1940 }, { "completion_length": 162.71429443359375, "epoch": 1.862763915547025, "grad_norm": 0.8717838525772095, "kl": 0.31619855761528015, "learning_rate": 1.784717601704986e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1941 }, { "completion_length": 162.07144165039062, "epoch": 1.8637236084452975, "grad_norm": 0.7092552781105042, "kl": 0.2838698625564575, "learning_rate": 1.7821292159918516e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1942 }, { "completion_length": 166.35714721679688, "epoch": 1.86468330134357, "grad_norm": 1.4489967823028564, "kl": 0.5334750413894653, "learning_rate": 1.7795416686876922e-07, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1943 }, { "completion_length": 174.57144165039062, "epoch": 1.8656429942418427, "grad_norm": 0.016640866175293922, "kl": 0.3756978213787079, "learning_rate": 1.7769549628145306e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1944 }, { "completion_length": 140.85714721679688, "epoch": 1.8666026871401151, "grad_norm": 0.016462670639157295, "kl": 0.32723701000213623, "learning_rate": 1.7743691013934102e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1945 }, { "completion_length": 149.6428680419922, "epoch": 1.8675623800383878, "grad_norm": 1.8824131488800049, "kl": 0.3326510787010193, "learning_rate": 1.7717840874443836e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1946 }, { "completion_length": 185.71429443359375, "epoch": 1.8685220729366603, "grad_norm": 3.2123382091522217, "kl": 0.430915504693985, "learning_rate": 1.7691999239865176e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1947 }, { "completion_length": 171.1428680419922, "epoch": 1.8694817658349328, "grad_norm": 1.5744131803512573, "kl": 0.3193838596343994, "learning_rate": 1.7666166140378853e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1948 }, { "completion_length": 176.21429443359375, "epoch": 1.8704414587332052, "grad_norm": 0.931290328502655, "kl": 0.4100518524646759, "learning_rate": 1.76403416061556e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1949 }, { "completion_length": 152.5, "epoch": 1.871401151631478, "grad_norm": 1.0797972679138184, "kl": 0.38546136021614075, "learning_rate": 1.7614525667356184e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1950 }, { "completion_length": 182.85714721679688, "epoch": 1.8723608445297506, "grad_norm": 0.011893948540091515, "kl": 0.3100864887237549, "learning_rate": 1.758871835413131e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1951 }, { "completion_length": 132.5, "epoch": 1.873320537428023, "grad_norm": 0.85755455493927, "kl": 0.38546842336654663, "learning_rate": 1.7562919696621624e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1952 }, { "completion_length": 186.85714721679688, "epoch": 1.8742802303262955, "grad_norm": 1.619438648223877, "kl": 0.3423078656196594, "learning_rate": 1.753712972495764e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1953 }, { "completion_length": 178.35714721679688, "epoch": 1.875239923224568, "grad_norm": 1.541591763496399, "kl": 0.33740589022636414, "learning_rate": 1.7511348469259745e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1954 }, { "completion_length": 171.21429443359375, "epoch": 1.8761996161228407, "grad_norm": 0.011291819624602795, "kl": 0.3147250711917877, "learning_rate": 1.7485575959638149e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1955 }, { "completion_length": 179.71429443359375, "epoch": 1.8771593090211134, "grad_norm": 1.6299210786819458, "kl": 0.39758914709091187, "learning_rate": 1.7459812226192843e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1956 }, { "completion_length": 209.71429443359375, "epoch": 1.8781190019193859, "grad_norm": 2.0998826026916504, "kl": 0.30655282735824585, "learning_rate": 1.743405729901356e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1957 }, { "completion_length": 145.42857360839844, "epoch": 1.8790786948176583, "grad_norm": 1.6770509481430054, "kl": 0.4093273878097534, "learning_rate": 1.740831120817976e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1958 }, { "completion_length": 212.07144165039062, "epoch": 1.8800383877159308, "grad_norm": 0.9799344539642334, "kl": 0.28077900409698486, "learning_rate": 1.7382573983760583e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1959 }, { "completion_length": 158.57144165039062, "epoch": 1.8809980806142035, "grad_norm": 0.008978622034192085, "kl": 0.3699891269207001, "learning_rate": 1.7356845655814796e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1960 }, { "completion_length": 164.1428680419922, "epoch": 1.8819577735124762, "grad_norm": 1.5636346340179443, "kl": 0.3519386053085327, "learning_rate": 1.7331126254390803e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1961 }, { "completion_length": 189.71429443359375, "epoch": 1.8829174664107486, "grad_norm": 1.144923210144043, "kl": 0.3858349919319153, "learning_rate": 1.7305415809526553e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1962 }, { "completion_length": 198.21429443359375, "epoch": 1.883877159309021, "grad_norm": 1.425498127937317, "kl": 0.33455690741539, "learning_rate": 1.727971435124956e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1963 }, { "completion_length": 144.57144165039062, "epoch": 1.8848368522072936, "grad_norm": 1.721240758895874, "kl": 0.3698238730430603, "learning_rate": 1.7254021909576833e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1964 }, { "completion_length": 165.85714721679688, "epoch": 1.8857965451055663, "grad_norm": 1.1778347492218018, "kl": 0.4697466790676117, "learning_rate": 1.7228338514514841e-07, "loss": 0.0005, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1965 }, { "completion_length": 161.07144165039062, "epoch": 1.8867562380038387, "grad_norm": 1.4128236770629883, "kl": 0.4352630078792572, "learning_rate": 1.7202664196059516e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1966 }, { "completion_length": 183.35714721679688, "epoch": 1.8877159309021114, "grad_norm": 0.009160565212368965, "kl": 0.30039429664611816, "learning_rate": 1.7176998984196144e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1967 }, { "completion_length": 184.57144165039062, "epoch": 1.888675623800384, "grad_norm": 0.7680032849311829, "kl": 0.2938414514064789, "learning_rate": 1.7151342908899413e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1968 }, { "completion_length": 185.1428680419922, "epoch": 1.8896353166986564, "grad_norm": 1.2562705278396606, "kl": 0.3603867292404175, "learning_rate": 1.712569600013333e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1969 }, { "completion_length": 178.21429443359375, "epoch": 1.8905950095969288, "grad_norm": 1.3123679161071777, "kl": 0.31860288977622986, "learning_rate": 1.7100058287851187e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1970 }, { "completion_length": 265.8571472167969, "epoch": 1.8915547024952015, "grad_norm": 0.7623867392539978, "kl": 0.2601465582847595, "learning_rate": 1.707442980199555e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1971 }, { "completion_length": 216.92857360839844, "epoch": 1.8925143953934742, "grad_norm": 0.00888078473508358, "kl": 0.23970438539981842, "learning_rate": 1.7048810572498193e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1972 }, { "completion_length": 179.1428680419922, "epoch": 1.8934740882917467, "grad_norm": 2.003784418106079, "kl": 0.37862861156463623, "learning_rate": 1.702320062928011e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1973 }, { "completion_length": 160.07144165039062, "epoch": 1.8944337811900192, "grad_norm": 0.01261183898895979, "kl": 0.40252357721328735, "learning_rate": 1.6997600002251404e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1974 }, { "completion_length": 200.71429443359375, "epoch": 1.8953934740882916, "grad_norm": 1.5364468097686768, "kl": 0.3263211250305176, "learning_rate": 1.6972008721311326e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1975 }, { "completion_length": 168.42857360839844, "epoch": 1.8963531669865643, "grad_norm": 0.012072536163032055, "kl": 0.35699865221977234, "learning_rate": 1.6946426816348213e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1976 }, { "completion_length": 176.71429443359375, "epoch": 1.897312859884837, "grad_norm": 0.007845624350011349, "kl": 0.3951348662376404, "learning_rate": 1.6920854317239447e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1977 }, { "completion_length": 156.1428680419922, "epoch": 1.8982725527831095, "grad_norm": 0.011883349157869816, "kl": 0.37490779161453247, "learning_rate": 1.6895291253851413e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1978 }, { "completion_length": 185.57144165039062, "epoch": 1.899232245681382, "grad_norm": 0.6992540955543518, "kl": 0.3388931453227997, "learning_rate": 1.6869737656039507e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1979 }, { "completion_length": 158.85714721679688, "epoch": 1.9001919385796544, "grad_norm": 0.022769002243876457, "kl": 0.38256147503852844, "learning_rate": 1.6844193553648022e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1980 }, { "completion_length": 160.0, "epoch": 1.901151631477927, "grad_norm": 1.5068466663360596, "kl": 0.32458463311195374, "learning_rate": 1.6818658976510203e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1981 }, { "completion_length": 169.6428680419922, "epoch": 1.9021113243761996, "grad_norm": 0.8860602378845215, "kl": 0.3778272867202759, "learning_rate": 1.6793133954448163e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1982 }, { "completion_length": 164.1428680419922, "epoch": 1.9030710172744723, "grad_norm": 1.5072095394134521, "kl": 0.38499072194099426, "learning_rate": 1.6767618517272837e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1983 }, { "completion_length": 222.21429443359375, "epoch": 1.9040307101727447, "grad_norm": 1.0171705484390259, "kl": 0.2711343467235565, "learning_rate": 1.6742112694783978e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1984 }, { "completion_length": 142.71429443359375, "epoch": 1.9049904030710172, "grad_norm": 1.29362154006958, "kl": 0.3788983225822449, "learning_rate": 1.6716616516770116e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1985 }, { "completion_length": 167.5, "epoch": 1.9059500959692899, "grad_norm": 0.010531735606491566, "kl": 0.3563062250614166, "learning_rate": 1.669113001300851e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1986 }, { "completion_length": 163.5, "epoch": 1.9069097888675623, "grad_norm": 1.455471158027649, "kl": 0.3731844127178192, "learning_rate": 1.666565321326512e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1987 }, { "completion_length": 181.7857208251953, "epoch": 1.907869481765835, "grad_norm": 1.5951939821243286, "kl": 0.3583778440952301, "learning_rate": 1.664018614729456e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 1988 }, { "completion_length": 142.1428680419922, "epoch": 1.9088291746641075, "grad_norm": 0.012394011951982975, "kl": 0.4522750973701477, "learning_rate": 1.6614728844840103e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 1989 }, { "completion_length": 194.50001525878906, "epoch": 1.90978886756238, "grad_norm": 0.02899080142378807, "kl": 0.3198295831680298, "learning_rate": 1.6589281335633604e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1990 }, { "completion_length": 195.85714721679688, "epoch": 1.9107485604606524, "grad_norm": 0.0118798753246665, "kl": 0.380973219871521, "learning_rate": 1.6563843649395476e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 1991 }, { "completion_length": 228.1428680419922, "epoch": 1.9117082533589251, "grad_norm": 0.023082805797457695, "kl": 0.349768728017807, "learning_rate": 1.6538415815834665e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1992 }, { "completion_length": 166.2857208251953, "epoch": 1.9126679462571978, "grad_norm": 1.0367299318313599, "kl": 0.391125351190567, "learning_rate": 1.6512997864648625e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1993 }, { "completion_length": 165.2857208251953, "epoch": 1.9136276391554703, "grad_norm": 1.2704554796218872, "kl": 0.3432885706424713, "learning_rate": 1.6487589825523234e-07, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1994 }, { "completion_length": 188.2857208251953, "epoch": 1.9145873320537428, "grad_norm": 0.010816401802003384, "kl": 0.32315921783447266, "learning_rate": 1.6462191728132825e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1995 }, { "completion_length": 170.71429443359375, "epoch": 1.9155470249520152, "grad_norm": 0.8576636910438538, "kl": 0.3023669123649597, "learning_rate": 1.6436803602140095e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 1996 }, { "completion_length": 153.21429443359375, "epoch": 1.916506717850288, "grad_norm": 0.008117595687508583, "kl": 0.3511749804019928, "learning_rate": 1.6411425477196116e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1997 }, { "completion_length": 176.1428680419922, "epoch": 1.9174664107485606, "grad_norm": 0.07326306402683258, "kl": 0.458021342754364, "learning_rate": 1.6386057382940283e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1998 }, { "completion_length": 207.92857360839844, "epoch": 1.918426103646833, "grad_norm": 1.5790928602218628, "kl": 0.3247391879558563, "learning_rate": 1.6360699349000246e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 1999 }, { "completion_length": 153.1428680419922, "epoch": 1.9193857965451055, "grad_norm": 0.02794785052537918, "kl": 0.38632211089134216, "learning_rate": 1.6335351404991942e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2000 }, { "completion_length": 183.71429443359375, "epoch": 1.920345489443378, "grad_norm": 0.009280946105718613, "kl": 0.30543404817581177, "learning_rate": 1.6310013580519485e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2001 }, { "completion_length": 158.35714721679688, "epoch": 1.9213051823416507, "grad_norm": 2.220155954360962, "kl": 0.35431572794914246, "learning_rate": 1.6284685905175204e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2002 }, { "completion_length": 172.50001525878906, "epoch": 1.9222648752399232, "grad_norm": 1.4862374067306519, "kl": 0.3486287295818329, "learning_rate": 1.625936840853957e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2003 }, { "completion_length": 179.00001525878906, "epoch": 1.9232245681381959, "grad_norm": 0.010594288818538189, "kl": 0.30425429344177246, "learning_rate": 1.6234061120181143e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2004 }, { "completion_length": 133.6428680419922, "epoch": 1.9241842610364683, "grad_norm": 1.4406238794326782, "kl": 0.41387832164764404, "learning_rate": 1.6208764069656578e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2005 }, { "completion_length": 188.1428680419922, "epoch": 1.9251439539347408, "grad_norm": 0.014161624945700169, "kl": 0.31635648012161255, "learning_rate": 1.6183477286510586e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2006 }, { "completion_length": 134.0, "epoch": 1.9261036468330133, "grad_norm": 0.02344086952507496, "kl": 0.4702874720096588, "learning_rate": 1.615820080027586e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2007 }, { "completion_length": 199.00001525878906, "epoch": 1.927063339731286, "grad_norm": 0.023382142186164856, "kl": 0.3414263427257538, "learning_rate": 1.6132934640473078e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2008 }, { "completion_length": 180.35714721679688, "epoch": 1.9280230326295587, "grad_norm": 0.010912965051829815, "kl": 0.31634843349456787, "learning_rate": 1.610767883661086e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2009 }, { "completion_length": 156.21429443359375, "epoch": 1.9289827255278311, "grad_norm": 1.6501529216766357, "kl": 0.42608916759490967, "learning_rate": 1.608243341818573e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2010 }, { "completion_length": 143.2857208251953, "epoch": 1.9299424184261036, "grad_norm": 1.5467026233673096, "kl": 0.36711934208869934, "learning_rate": 1.6057198414682086e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2011 }, { "completion_length": 169.92857360839844, "epoch": 1.930902111324376, "grad_norm": 0.006167185492813587, "kl": 0.37335988879203796, "learning_rate": 1.603197385557215e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2012 }, { "completion_length": 168.6428680419922, "epoch": 1.9318618042226487, "grad_norm": 0.8617865443229675, "kl": 0.3078854978084564, "learning_rate": 1.6006759770315976e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2013 }, { "completion_length": 171.2857208251953, "epoch": 1.9328214971209214, "grad_norm": 0.04290061444044113, "kl": 0.4400251507759094, "learning_rate": 1.5981556188361334e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2014 }, { "completion_length": 164.85714721679688, "epoch": 1.933781190019194, "grad_norm": 0.01119558047503233, "kl": 0.35357552766799927, "learning_rate": 1.5956363139143774e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2015 }, { "completion_length": 173.85714721679688, "epoch": 1.9347408829174664, "grad_norm": 0.01392086036503315, "kl": 0.3729488253593445, "learning_rate": 1.5931180652086528e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2016 }, { "completion_length": 245.57144165039062, "epoch": 1.9357005758157388, "grad_norm": 1.1205893754959106, "kl": 0.2925494611263275, "learning_rate": 1.5906008756600488e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2017 }, { "completion_length": 160.42857360839844, "epoch": 1.9366602687140115, "grad_norm": 0.01326189935207367, "kl": 0.4142484962940216, "learning_rate": 1.5880847482084182e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2018 }, { "completion_length": 160.35714721679688, "epoch": 1.9376199616122842, "grad_norm": 0.011254110373556614, "kl": 0.3594672381877899, "learning_rate": 1.5855696857923736e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2019 }, { "completion_length": 186.7857208251953, "epoch": 1.9385796545105567, "grad_norm": 1.535846471786499, "kl": 0.36704951524734497, "learning_rate": 1.583055691349283e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2020 }, { "completion_length": 175.35714721679688, "epoch": 1.9395393474088292, "grad_norm": 0.009774011559784412, "kl": 0.33024725317955017, "learning_rate": 1.5805427678152674e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2021 }, { "completion_length": 154.57144165039062, "epoch": 1.9404990403071016, "grad_norm": 0.9087936282157898, "kl": 0.43296703696250916, "learning_rate": 1.5780309181251965e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2022 }, { "completion_length": 191.35714721679688, "epoch": 1.9414587332053743, "grad_norm": 0.014832066372036934, "kl": 0.35193201899528503, "learning_rate": 1.575520145212687e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2023 }, { "completion_length": 222.71429443359375, "epoch": 1.9424184261036468, "grad_norm": 0.6546249389648438, "kl": 0.33302417397499084, "learning_rate": 1.573010452010098e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2024 }, { "completion_length": 207.2857208251953, "epoch": 1.9433781190019195, "grad_norm": 1.3195998668670654, "kl": 0.31304067373275757, "learning_rate": 1.570501841448526e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2025 }, { "completion_length": 166.1428680419922, "epoch": 1.944337811900192, "grad_norm": 0.011959760449826717, "kl": 0.37135738134384155, "learning_rate": 1.5679943164578046e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2026 }, { "completion_length": 218.92857360839844, "epoch": 1.9452975047984644, "grad_norm": 0.015206662006676197, "kl": 0.26863667368888855, "learning_rate": 1.5654878799665004e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2027 }, { "completion_length": 182.07144165039062, "epoch": 1.9462571976967369, "grad_norm": 1.1900506019592285, "kl": 0.34265896677970886, "learning_rate": 1.5629825349019052e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2028 }, { "completion_length": 138.35714721679688, "epoch": 1.9472168905950096, "grad_norm": 1.3519591093063354, "kl": 0.3613649308681488, "learning_rate": 1.5604782841900394e-07, "loss": 0.0004, "reward": 1.571428656578064, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2029 }, { "completion_length": 131.57144165039062, "epoch": 1.9481765834932823, "grad_norm": 0.021612638607621193, "kl": 0.3898244798183441, "learning_rate": 1.5579751307556438e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2030 }, { "completion_length": 187.42857360839844, "epoch": 1.9491362763915547, "grad_norm": 0.03385988995432854, "kl": 0.3703863322734833, "learning_rate": 1.5554730775221786e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2031 }, { "completion_length": 155.92857360839844, "epoch": 1.9500959692898272, "grad_norm": 1.8355895280838013, "kl": 0.37626275420188904, "learning_rate": 1.5529721274118185e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2032 }, { "completion_length": 191.00001525878906, "epoch": 1.9510556621880997, "grad_norm": 0.009655044414103031, "kl": 0.2886083722114563, "learning_rate": 1.5504722833454493e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2033 }, { "completion_length": 150.21429443359375, "epoch": 1.9520153550863724, "grad_norm": 0.924811840057373, "kl": 0.4297758638858795, "learning_rate": 1.5479735482426673e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2034 }, { "completion_length": 181.21429443359375, "epoch": 1.952975047984645, "grad_norm": 0.014885883778333664, "kl": 0.3386618196964264, "learning_rate": 1.5454759250217697e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2035 }, { "completion_length": 198.07144165039062, "epoch": 1.9539347408829175, "grad_norm": 0.018241995945572853, "kl": 0.3018340468406677, "learning_rate": 1.5429794165997583e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2036 }, { "completion_length": 160.21429443359375, "epoch": 1.95489443378119, "grad_norm": 0.9790917634963989, "kl": 0.3683483898639679, "learning_rate": 1.540484025892333e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2037 }, { "completion_length": 168.2857208251953, "epoch": 1.9558541266794625, "grad_norm": 0.011446705088019371, "kl": 0.3359403908252716, "learning_rate": 1.537989755813886e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2038 }, { "completion_length": 203.35714721679688, "epoch": 1.9568138195777351, "grad_norm": 1.0232470035552979, "kl": 0.3051636815071106, "learning_rate": 1.5354966092775024e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2039 }, { "completion_length": 157.57144165039062, "epoch": 1.9577735124760078, "grad_norm": 0.955339252948761, "kl": 0.4054111838340759, "learning_rate": 1.5330045891949555e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2040 }, { "completion_length": 191.35714721679688, "epoch": 1.9587332053742803, "grad_norm": 1.200582504272461, "kl": 0.33067527413368225, "learning_rate": 1.5305136984767018e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2041 }, { "completion_length": 146.2857208251953, "epoch": 1.9596928982725528, "grad_norm": 2.010958671569824, "kl": 0.42395779490470886, "learning_rate": 1.5280239400318786e-07, "loss": 0.0004, "reward": 1.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2042 }, { "completion_length": 186.35714721679688, "epoch": 1.9606525911708252, "grad_norm": 1.1368249654769897, "kl": 0.3337269127368927, "learning_rate": 1.5255353167683017e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2043 }, { "completion_length": 202.42857360839844, "epoch": 1.961612284069098, "grad_norm": 0.009827221743762493, "kl": 0.29964253306388855, "learning_rate": 1.523047831592461e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2044 }, { "completion_length": 217.35714721679688, "epoch": 1.9625719769673704, "grad_norm": 0.6976305842399597, "kl": 0.25965091586112976, "learning_rate": 1.5205614874095176e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2045 }, { "completion_length": 157.92857360839844, "epoch": 1.963531669865643, "grad_norm": 1.3418989181518555, "kl": 0.4073353111743927, "learning_rate": 1.5180762871232986e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2046 }, { "completion_length": 134.85714721679688, "epoch": 1.9644913627639156, "grad_norm": 2.114549398422241, "kl": 0.5092458724975586, "learning_rate": 1.5155922336362976e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2047 }, { "completion_length": 162.85714721679688, "epoch": 1.965451055662188, "grad_norm": 1.361716628074646, "kl": 0.4338891804218292, "learning_rate": 1.5131093298496645e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2048 }, { "completion_length": 220.35714721679688, "epoch": 1.9664107485604605, "grad_norm": 0.011171914637088776, "kl": 0.23637230694293976, "learning_rate": 1.510627578663211e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2049 }, { "completion_length": 169.21429443359375, "epoch": 1.9673704414587332, "grad_norm": 1.1802127361297607, "kl": 0.3974737226963043, "learning_rate": 1.5081469829754018e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2050 }, { "completion_length": 188.35714721679688, "epoch": 1.9683301343570059, "grad_norm": 0.9808129072189331, "kl": 0.29325419664382935, "learning_rate": 1.505667545683349e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2051 }, { "completion_length": 172.6428680419922, "epoch": 1.9692898272552783, "grad_norm": 0.01445225439965725, "kl": 0.3461993932723999, "learning_rate": 1.503189269682815e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2052 }, { "completion_length": 209.21429443359375, "epoch": 1.9702495201535508, "grad_norm": 0.8063792586326599, "kl": 0.24461404979228973, "learning_rate": 1.5007121578682056e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2053 }, { "completion_length": 261.4285888671875, "epoch": 1.9712092130518233, "grad_norm": 0.012657571583986282, "kl": 0.2428378164768219, "learning_rate": 1.4982362131325653e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2054 }, { "completion_length": 191.2857208251953, "epoch": 1.972168905950096, "grad_norm": 0.01759399101138115, "kl": 0.3082490861415863, "learning_rate": 1.4957614383675767e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2055 }, { "completion_length": 171.71429443359375, "epoch": 1.9731285988483687, "grad_norm": 0.7553858160972595, "kl": 0.3326759338378906, "learning_rate": 1.4932878364635555e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2056 }, { "completion_length": 167.57144165039062, "epoch": 1.9740882917466411, "grad_norm": 0.008583560585975647, "kl": 0.4030052125453949, "learning_rate": 1.4908154103094477e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2057 }, { "completion_length": 201.85714721679688, "epoch": 1.9750479846449136, "grad_norm": 0.01708386465907097, "kl": 0.27616938948631287, "learning_rate": 1.4883441627928272e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2058 }, { "completion_length": 164.71429443359375, "epoch": 1.976007677543186, "grad_norm": 1.7796300649642944, "kl": 0.3821627199649811, "learning_rate": 1.4858740967998895e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 2059 }, { "completion_length": 165.2857208251953, "epoch": 1.9769673704414588, "grad_norm": 0.010119158774614334, "kl": 0.33980146050453186, "learning_rate": 1.4834052152154513e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2060 }, { "completion_length": 137.35714721679688, "epoch": 1.9779270633397315, "grad_norm": 1.4646477699279785, "kl": 0.3973689675331116, "learning_rate": 1.480937520922947e-07, "loss": 0.0004, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 2061 }, { "completion_length": 151.07144165039062, "epoch": 1.978886756238004, "grad_norm": 0.018644263967871666, "kl": 0.37880757451057434, "learning_rate": 1.4784710168044212e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2062 }, { "completion_length": 192.07144165039062, "epoch": 1.9798464491362764, "grad_norm": 0.008864806964993477, "kl": 0.27276307344436646, "learning_rate": 1.476005705740532e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2063 }, { "completion_length": 192.7857208251953, "epoch": 1.9808061420345489, "grad_norm": 0.8032196760177612, "kl": 0.27232515811920166, "learning_rate": 1.4735415906105417e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2064 }, { "completion_length": 190.42857360839844, "epoch": 1.9817658349328215, "grad_norm": 1.6236575841903687, "kl": 0.3219146132469177, "learning_rate": 1.471078674292317e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2065 }, { "completion_length": 161.5, "epoch": 1.982725527831094, "grad_norm": 0.025129202753305435, "kl": 0.3905807435512543, "learning_rate": 1.4686169596623247e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2066 }, { "completion_length": 221.57144165039062, "epoch": 1.9836852207293667, "grad_norm": 1.033141851425171, "kl": 0.2785645127296448, "learning_rate": 1.4661564495956268e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2067 }, { "completion_length": 162.35714721679688, "epoch": 1.9846449136276392, "grad_norm": 0.8015051484107971, "kl": 0.39090049266815186, "learning_rate": 1.463697146965881e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2068 }, { "completion_length": 181.2857208251953, "epoch": 1.9856046065259116, "grad_norm": 2.070223808288574, "kl": 0.3575485944747925, "learning_rate": 1.4612390546453308e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2069 }, { "completion_length": 211.50001525878906, "epoch": 1.986564299424184, "grad_norm": 1.1228636503219604, "kl": 0.3322354257106781, "learning_rate": 1.4587821755048097e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2070 }, { "completion_length": 148.21429443359375, "epoch": 1.9875239923224568, "grad_norm": 1.0765597820281982, "kl": 0.35189881920814514, "learning_rate": 1.456326512413734e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2071 }, { "completion_length": 176.7857208251953, "epoch": 1.9884836852207295, "grad_norm": 0.032467070966959, "kl": 0.35254254937171936, "learning_rate": 1.4538720682400967e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2072 }, { "completion_length": 185.1428680419922, "epoch": 1.989443378119002, "grad_norm": 1.6520260572433472, "kl": 0.3045274317264557, "learning_rate": 1.4514188458504724e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2073 }, { "completion_length": 164.6428680419922, "epoch": 1.9904030710172744, "grad_norm": 2.433107852935791, "kl": 0.46404320001602173, "learning_rate": 1.4489668481100037e-07, "loss": 0.0005, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2074 }, { "completion_length": 149.1428680419922, "epoch": 1.991362763915547, "grad_norm": 0.9082654118537903, "kl": 0.3397182524204254, "learning_rate": 1.446516077882406e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2075 }, { "completion_length": 156.71429443359375, "epoch": 1.9923224568138196, "grad_norm": 1.8558710813522339, "kl": 0.3581148087978363, "learning_rate": 1.4440665380299593e-07, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2076 }, { "completion_length": 200.21429443359375, "epoch": 1.9932821497120923, "grad_norm": 1.0984302759170532, "kl": 0.5499500632286072, "learning_rate": 1.4416182314135078e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2077 }, { "completion_length": 136.57144165039062, "epoch": 1.9942418426103647, "grad_norm": 1.1932830810546875, "kl": 0.40976792573928833, "learning_rate": 1.4391711608924543e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2078 }, { "completion_length": 194.71429443359375, "epoch": 1.9952015355086372, "grad_norm": 0.014872231520712376, "kl": 0.31477776169776917, "learning_rate": 1.43672532932476e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2079 }, { "completion_length": 169.1428680419922, "epoch": 1.9961612284069097, "grad_norm": 0.013258764520287514, "kl": 0.3978249728679657, "learning_rate": 1.434280739566936e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2080 }, { "completion_length": 171.85714721679688, "epoch": 1.9971209213051824, "grad_norm": 0.007795254234224558, "kl": 0.3347671627998352, "learning_rate": 1.4318373944740484e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2081 }, { "completion_length": 158.71429443359375, "epoch": 1.9980806142034548, "grad_norm": 1.250637173652649, "kl": 0.34738054871559143, "learning_rate": 1.4293952968997022e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2082 }, { "completion_length": 138.6428680419922, "epoch": 1.9990403071017275, "grad_norm": 1.8610702753067017, "kl": 0.4089615046977997, "learning_rate": 1.42695444969605e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2083 }, { "completion_length": 224.25, "epoch": 2.0, "grad_norm": 0.019078398123383522, "kl": 0.3776806592941284, "learning_rate": 1.4245148557137848e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2084 }, { "completion_length": 149.21429443359375, "epoch": 2.0009596928982725, "grad_norm": 0.013989738188683987, "kl": 0.34300240874290466, "learning_rate": 1.4220765178021343e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2085 }, { "completion_length": 209.92857360839844, "epoch": 2.001919385796545, "grad_norm": 1.2471243143081665, "kl": 0.2907347083091736, "learning_rate": 1.4196394388088578e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2086 }, { "completion_length": 159.71429443359375, "epoch": 2.002879078694818, "grad_norm": 1.469150185585022, "kl": 0.384341835975647, "learning_rate": 1.417203621580248e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2087 }, { "completion_length": 200.85714721679688, "epoch": 2.0038387715930903, "grad_norm": 0.9164652228355408, "kl": 0.3266609311103821, "learning_rate": 1.4147690689611215e-07, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2088 }, { "completion_length": 134.5, "epoch": 2.004798464491363, "grad_norm": 0.03468313813209534, "kl": 0.4861953854560852, "learning_rate": 1.4123357837948176e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2089 }, { "completion_length": 193.6428680419922, "epoch": 2.0057581573896353, "grad_norm": 0.009075839072465897, "kl": 0.36757609248161316, "learning_rate": 1.4099037689231962e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2090 }, { "completion_length": 182.35714721679688, "epoch": 2.0067178502879077, "grad_norm": 0.5554079413414001, "kl": 0.2950313687324524, "learning_rate": 1.407473027186633e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2091 }, { "completion_length": 177.6428680419922, "epoch": 2.0076775431861806, "grad_norm": 1.1737858057022095, "kl": 0.261620432138443, "learning_rate": 1.405043561424019e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2092 }, { "completion_length": 235.35714721679688, "epoch": 2.008637236084453, "grad_norm": 0.01794557087123394, "kl": 0.29764169454574585, "learning_rate": 1.4026153744727514e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2093 }, { "completion_length": 196.07144165039062, "epoch": 2.0095969289827256, "grad_norm": 1.5850019454956055, "kl": 0.30651769042015076, "learning_rate": 1.400188469168738e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2094 }, { "completion_length": 141.85714721679688, "epoch": 2.010556621880998, "grad_norm": 2.296217918395996, "kl": 0.36811742186546326, "learning_rate": 1.3977628483463865e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2095 }, { "completion_length": 129.21429443359375, "epoch": 2.0115163147792705, "grad_norm": 0.039844077080488205, "kl": 0.5324660539627075, "learning_rate": 1.3953385148386034e-07, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2096 }, { "completion_length": 160.5, "epoch": 2.012476007677543, "grad_norm": 1.399093747138977, "kl": 0.4919953942298889, "learning_rate": 1.3929154714767966e-07, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2097 }, { "completion_length": 184.1428680419922, "epoch": 2.013435700575816, "grad_norm": 1.0833245515823364, "kl": 0.29951047897338867, "learning_rate": 1.390493721090863e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2098 }, { "completion_length": 159.1428680419922, "epoch": 2.0143953934740884, "grad_norm": 1.191754937171936, "kl": 0.35810956358909607, "learning_rate": 1.3880732665091898e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2099 }, { "completion_length": 130.0, "epoch": 2.015355086372361, "grad_norm": 0.9595762491226196, "kl": 0.4288724958896637, "learning_rate": 1.3856541105586545e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2100 }, { "completion_length": 181.6428680419922, "epoch": 2.0163147792706333, "grad_norm": 0.7254497408866882, "kl": 0.2809857130050659, "learning_rate": 1.3832362560646125e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2101 }, { "completion_length": 158.57144165039062, "epoch": 2.0172744721689058, "grad_norm": 0.9165582656860352, "kl": 0.36874425411224365, "learning_rate": 1.3808197058509053e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2102 }, { "completion_length": 209.42857360839844, "epoch": 2.0182341650671787, "grad_norm": 1.4475003480911255, "kl": 0.2765994668006897, "learning_rate": 1.3784044627398445e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2103 }, { "completion_length": 170.07144165039062, "epoch": 2.019193857965451, "grad_norm": 0.01107478141784668, "kl": 0.3391694724559784, "learning_rate": 1.3759905295522186e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2104 }, { "completion_length": 198.6428680419922, "epoch": 2.0201535508637236, "grad_norm": 1.4654057025909424, "kl": 0.29227492213249207, "learning_rate": 1.373577909107288e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2105 }, { "completion_length": 173.1428680419922, "epoch": 2.021113243761996, "grad_norm": 1.2883341312408447, "kl": 0.3305601179599762, "learning_rate": 1.371166604222777e-07, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2106 }, { "completion_length": 181.1428680419922, "epoch": 2.0220729366602685, "grad_norm": 0.009826159104704857, "kl": 0.3913710415363312, "learning_rate": 1.3687566177148735e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2107 }, { "completion_length": 162.7857208251953, "epoch": 2.0230326295585415, "grad_norm": 0.009689300321042538, "kl": 0.3373867869377136, "learning_rate": 1.366347952398228e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2108 }, { "completion_length": 177.42857360839844, "epoch": 2.023992322456814, "grad_norm": 1.2226629257202148, "kl": 0.33037111163139343, "learning_rate": 1.363940611085946e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 2109 }, { "completion_length": 173.00001525878906, "epoch": 2.0249520153550864, "grad_norm": 0.009614918380975723, "kl": 0.292025625705719, "learning_rate": 1.3615345965895874e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2110 }, { "completion_length": 156.57144165039062, "epoch": 2.025911708253359, "grad_norm": 1.389418601989746, "kl": 0.346670925617218, "learning_rate": 1.3591299117191616e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2111 }, { "completion_length": 163.1428680419922, "epoch": 2.0268714011516313, "grad_norm": 0.008414109237492085, "kl": 0.33062922954559326, "learning_rate": 1.356726559283125e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2112 }, { "completion_length": 231.2857208251953, "epoch": 2.0278310940499042, "grad_norm": 0.010674198158085346, "kl": 0.2682853937149048, "learning_rate": 1.3543245420883814e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2113 }, { "completion_length": 215.07144165039062, "epoch": 2.0287907869481767, "grad_norm": 0.009106101468205452, "kl": 0.2460675835609436, "learning_rate": 1.3519238629402693e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2114 }, { "completion_length": 154.2857208251953, "epoch": 2.029750479846449, "grad_norm": 2.139479875564575, "kl": 0.41409340500831604, "learning_rate": 1.34952452464257e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2115 }, { "completion_length": 182.21429443359375, "epoch": 2.0307101727447217, "grad_norm": 1.2124195098876953, "kl": 0.3184549808502197, "learning_rate": 1.347126529997497e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2116 }, { "completion_length": 215.6428680419922, "epoch": 2.031669865642994, "grad_norm": 1.1189872026443481, "kl": 0.2854171097278595, "learning_rate": 1.3447298818056903e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2117 }, { "completion_length": 163.6428680419922, "epoch": 2.0326295585412666, "grad_norm": 1.50881826877594, "kl": 0.34910500049591064, "learning_rate": 1.3423345828662235e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2118 }, { "completion_length": 156.85714721679688, "epoch": 2.0335892514395395, "grad_norm": 1.9462405443191528, "kl": 0.3266741931438446, "learning_rate": 1.339940635976592e-07, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2119 }, { "completion_length": 195.7857208251953, "epoch": 2.034548944337812, "grad_norm": 1.1056804656982422, "kl": 0.2869686484336853, "learning_rate": 1.3375480439327102e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2120 }, { "completion_length": 216.2857208251953, "epoch": 2.0355086372360844, "grad_norm": 0.8703192472457886, "kl": 0.2781197726726532, "learning_rate": 1.335156809528914e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2121 }, { "completion_length": 191.57144165039062, "epoch": 2.036468330134357, "grad_norm": 0.008357586339116096, "kl": 0.3802323639392853, "learning_rate": 1.3327669355579513e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2122 }, { "completion_length": 162.5, "epoch": 2.0374280230326294, "grad_norm": 1.2589441537857056, "kl": 0.3467886745929718, "learning_rate": 1.3303784248109808e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2123 }, { "completion_length": 140.0, "epoch": 2.0383877159309023, "grad_norm": 2.13344407081604, "kl": 0.4137902557849884, "learning_rate": 1.3279912800775703e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2124 }, { "completion_length": 140.85714721679688, "epoch": 2.0393474088291748, "grad_norm": 1.2661346197128296, "kl": 0.42558297514915466, "learning_rate": 1.3256055041456903e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2125 }, { "completion_length": 206.50001525878906, "epoch": 2.0403071017274472, "grad_norm": 1.6065608263015747, "kl": 0.31215307116508484, "learning_rate": 1.323221099801716e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2126 }, { "completion_length": 166.21429443359375, "epoch": 2.0412667946257197, "grad_norm": 1.399373173713684, "kl": 0.3666438162326813, "learning_rate": 1.320838069830418e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2127 }, { "completion_length": 184.1428680419922, "epoch": 2.042226487523992, "grad_norm": 0.009302711114287376, "kl": 0.30834266543388367, "learning_rate": 1.3184564170149614e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2128 }, { "completion_length": 174.57144165039062, "epoch": 2.043186180422265, "grad_norm": 0.9453549981117249, "kl": 0.27935343980789185, "learning_rate": 1.3160761441369073e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2129 }, { "completion_length": 167.1428680419922, "epoch": 2.0441458733205375, "grad_norm": 0.008064729161560535, "kl": 0.32010897994041443, "learning_rate": 1.3136972539761976e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2130 }, { "completion_length": 208.57144165039062, "epoch": 2.04510556621881, "grad_norm": 0.00915735773742199, "kl": 0.3115342855453491, "learning_rate": 1.3113197493111666e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2131 }, { "completion_length": 225.9285888671875, "epoch": 2.0460652591170825, "grad_norm": 0.017188969999551773, "kl": 0.25723162293434143, "learning_rate": 1.3089436329185267e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2132 }, { "completion_length": 181.2857208251953, "epoch": 2.047024952015355, "grad_norm": 0.01004840712994337, "kl": 0.3071969151496887, "learning_rate": 1.3065689075733682e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2133 }, { "completion_length": 104.78572082519531, "epoch": 2.047984644913628, "grad_norm": 2.760227918624878, "kl": 0.5455591082572937, "learning_rate": 1.3041955760491608e-07, "loss": 0.0005, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 2134 }, { "completion_length": 212.6428680419922, "epoch": 2.0489443378119003, "grad_norm": 1.12860107421875, "kl": 0.2843424081802368, "learning_rate": 1.3018236411177414e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2135 }, { "completion_length": 208.00001525878906, "epoch": 2.049904030710173, "grad_norm": 0.011982656084001064, "kl": 0.3333093822002411, "learning_rate": 1.2994531055493213e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2136 }, { "completion_length": 174.42857360839844, "epoch": 2.0508637236084453, "grad_norm": 1.0191868543624878, "kl": 0.29979559779167175, "learning_rate": 1.297083972112471e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2137 }, { "completion_length": 270.2857360839844, "epoch": 2.0518234165067177, "grad_norm": 0.01043161004781723, "kl": 0.23740799725055695, "learning_rate": 1.2947162435741277e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2138 }, { "completion_length": 218.71429443359375, "epoch": 2.05278310940499, "grad_norm": 0.5523020029067993, "kl": 0.32611608505249023, "learning_rate": 1.2923499226995883e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2139 }, { "completion_length": 195.2857208251953, "epoch": 2.053742802303263, "grad_norm": 1.1038180589675903, "kl": 0.2606958746910095, "learning_rate": 1.2899850122525035e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2140 }, { "completion_length": 176.2857208251953, "epoch": 2.0547024952015356, "grad_norm": 1.6429076194763184, "kl": 0.36505788564682007, "learning_rate": 1.2876215149948762e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2141 }, { "completion_length": 245.2857208251953, "epoch": 2.055662188099808, "grad_norm": 0.8471269607543945, "kl": 0.23591187596321106, "learning_rate": 1.2852594336870627e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2142 }, { "completion_length": 214.2857208251953, "epoch": 2.0566218809980805, "grad_norm": 0.9682890176773071, "kl": 0.2284972369670868, "learning_rate": 1.2828987710877615e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2143 }, { "completion_length": 177.6428680419922, "epoch": 2.057581573896353, "grad_norm": 1.113329291343689, "kl": 0.31677910685539246, "learning_rate": 1.2805395299540157e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2144 }, { "completion_length": 166.6428680419922, "epoch": 2.058541266794626, "grad_norm": 0.016870861873030663, "kl": 0.39815863966941833, "learning_rate": 1.2781817130412088e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2145 }, { "completion_length": 217.35714721679688, "epoch": 2.0595009596928984, "grad_norm": 0.009326552972197533, "kl": 0.2828511595726013, "learning_rate": 1.2758253231030592e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2146 }, { "completion_length": 181.2857208251953, "epoch": 2.060460652591171, "grad_norm": 1.513853669166565, "kl": 0.35252752900123596, "learning_rate": 1.2734703628916215e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2147 }, { "completion_length": 147.7857208251953, "epoch": 2.0614203454894433, "grad_norm": 1.3028980493545532, "kl": 0.3436087369918823, "learning_rate": 1.2711168351572786e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2148 }, { "completion_length": 194.2857208251953, "epoch": 2.0623800383877158, "grad_norm": 1.1229890584945679, "kl": 0.30979153513908386, "learning_rate": 1.2687647426487396e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2149 }, { "completion_length": 168.92857360839844, "epoch": 2.0633397312859887, "grad_norm": 0.008754386566579342, "kl": 0.3197518587112427, "learning_rate": 1.2664140881130412e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2150 }, { "completion_length": 158.0, "epoch": 2.064299424184261, "grad_norm": 0.018698420375585556, "kl": 0.3222905695438385, "learning_rate": 1.264064874295534e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2151 }, { "completion_length": 212.92857360839844, "epoch": 2.0652591170825336, "grad_norm": 0.010571110062301159, "kl": 0.29504549503326416, "learning_rate": 1.2617171039398934e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2152 }, { "completion_length": 202.57144165039062, "epoch": 2.066218809980806, "grad_norm": 0.01728310063481331, "kl": 0.2706984281539917, "learning_rate": 1.2593707797881043e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2153 }, { "completion_length": 223.4285888671875, "epoch": 2.0671785028790786, "grad_norm": 0.6224284768104553, "kl": 0.2924972176551819, "learning_rate": 1.2570259045804627e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2154 }, { "completion_length": 188.42857360839844, "epoch": 2.068138195777351, "grad_norm": 1.407900094985962, "kl": 0.8405649065971375, "learning_rate": 1.2546824810555756e-07, "loss": 0.0008, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2155 }, { "completion_length": 192.2857208251953, "epoch": 2.069097888675624, "grad_norm": 1.0613017082214355, "kl": 0.2650565803050995, "learning_rate": 1.252340511950351e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2156 }, { "completion_length": 163.1428680419922, "epoch": 2.0700575815738964, "grad_norm": 1.2907813787460327, "kl": 0.3109373450279236, "learning_rate": 1.2500000000000005e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2157 }, { "completion_length": 194.07144165039062, "epoch": 2.071017274472169, "grad_norm": 1.0239654779434204, "kl": 0.35988858342170715, "learning_rate": 1.2476609479380324e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2158 }, { "completion_length": 145.42857360839844, "epoch": 2.0719769673704413, "grad_norm": 0.012275854125618935, "kl": 0.3972342610359192, "learning_rate": 1.24532335849625e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2159 }, { "completion_length": 178.71429443359375, "epoch": 2.072936660268714, "grad_norm": 0.022484080865979195, "kl": 0.367228239774704, "learning_rate": 1.2429872344047507e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2160 }, { "completion_length": 226.07144165039062, "epoch": 2.0738963531669867, "grad_norm": 1.0243865251541138, "kl": 0.24105314910411835, "learning_rate": 1.240652578391918e-07, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2161 }, { "completion_length": 131.07144165039062, "epoch": 2.074856046065259, "grad_norm": 2.0128657817840576, "kl": 0.4581930637359619, "learning_rate": 1.238319393184421e-07, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2162 }, { "completion_length": 163.71429443359375, "epoch": 2.0758157389635317, "grad_norm": 1.9671947956085205, "kl": 0.40767350792884827, "learning_rate": 1.235987681507214e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2163 }, { "completion_length": 159.57144165039062, "epoch": 2.076775431861804, "grad_norm": 1.135184407234192, "kl": 0.43293729424476624, "learning_rate": 1.2336574460835248e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2164 }, { "completion_length": 184.7857208251953, "epoch": 2.0777351247600766, "grad_norm": 0.00912225991487503, "kl": 0.3024420142173767, "learning_rate": 1.2313286896348626e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2165 }, { "completion_length": 156.35714721679688, "epoch": 2.0786948176583495, "grad_norm": 1.3374378681182861, "kl": 0.4710233509540558, "learning_rate": 1.2290014148810062e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2166 }, { "completion_length": 202.07144165039062, "epoch": 2.079654510556622, "grad_norm": 2.9158196449279785, "kl": 0.3618565797805786, "learning_rate": 1.2266756245400042e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2167 }, { "completion_length": 179.00001525878906, "epoch": 2.0806142034548945, "grad_norm": 0.8457772731781006, "kl": 0.30120787024497986, "learning_rate": 1.2243513213281733e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2168 }, { "completion_length": 219.7857208251953, "epoch": 2.081573896353167, "grad_norm": 0.07792245596647263, "kl": 0.38692957162857056, "learning_rate": 1.2220285079600915e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2169 }, { "completion_length": 153.85714721679688, "epoch": 2.0825335892514394, "grad_norm": 0.03028506599366665, "kl": 0.4106762111186981, "learning_rate": 1.2197071871485974e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2170 }, { "completion_length": 200.2857208251953, "epoch": 2.0834932821497123, "grad_norm": 1.681792974472046, "kl": 0.3661563992500305, "learning_rate": 1.2173873616047862e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2171 }, { "completion_length": 199.21429443359375, "epoch": 2.0844529750479848, "grad_norm": 0.00887543149292469, "kl": 0.2578425109386444, "learning_rate": 1.2150690340380061e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2172 }, { "completion_length": 208.35714721679688, "epoch": 2.0854126679462572, "grad_norm": 1.114802360534668, "kl": 0.2745758593082428, "learning_rate": 1.212752207155859e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2173 }, { "completion_length": 208.2857208251953, "epoch": 2.0863723608445297, "grad_norm": 0.7127196192741394, "kl": 0.30565720796585083, "learning_rate": 1.2104368836641906e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2174 }, { "completion_length": 257.5714416503906, "epoch": 2.087332053742802, "grad_norm": 0.008362087421119213, "kl": 0.2636604309082031, "learning_rate": 1.2081230662670907e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2175 }, { "completion_length": 164.2857208251953, "epoch": 2.0882917466410746, "grad_norm": 1.0041075944900513, "kl": 0.3411406874656677, "learning_rate": 1.2058107576668938e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 2176 }, { "completion_length": 213.07144165039062, "epoch": 2.0892514395393476, "grad_norm": 0.00866999663412571, "kl": 0.2910430133342743, "learning_rate": 1.2034999605641687e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2177 }, { "completion_length": 167.21429443359375, "epoch": 2.09021113243762, "grad_norm": 0.011636492796242237, "kl": 0.3121149241924286, "learning_rate": 1.2011906776577202e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2178 }, { "completion_length": 152.0, "epoch": 2.0911708253358925, "grad_norm": 1.106221318244934, "kl": 0.4697701930999756, "learning_rate": 1.1988829116445843e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2179 }, { "completion_length": 176.1428680419922, "epoch": 2.092130518234165, "grad_norm": 1.4577786922454834, "kl": 0.3455580770969391, "learning_rate": 1.1965766652200245e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2180 }, { "completion_length": 159.21429443359375, "epoch": 2.0930902111324374, "grad_norm": 0.02685633860528469, "kl": 0.38935956358909607, "learning_rate": 1.1942719410775335e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2181 }, { "completion_length": 122.71429443359375, "epoch": 2.0940499040307103, "grad_norm": 0.024416105821728706, "kl": 0.5037440061569214, "learning_rate": 1.1919687419088214e-07, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2182 }, { "completion_length": 193.21429443359375, "epoch": 2.095009596928983, "grad_norm": 0.008242429234087467, "kl": 0.3115192651748657, "learning_rate": 1.1896670704038186e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2183 }, { "completion_length": 173.1428680419922, "epoch": 2.0959692898272553, "grad_norm": 1.169729232788086, "kl": 0.32850274443626404, "learning_rate": 1.1873669292506749e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2184 }, { "completion_length": 164.5, "epoch": 2.0969289827255277, "grad_norm": 0.013654135167598724, "kl": 0.35699108242988586, "learning_rate": 1.1850683211357456e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2185 }, { "completion_length": 199.2857208251953, "epoch": 2.0978886756238, "grad_norm": 0.02655913680791855, "kl": 0.2965131402015686, "learning_rate": 1.1827712487436032e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2186 }, { "completion_length": 204.85714721679688, "epoch": 2.098848368522073, "grad_norm": 1.3462095260620117, "kl": 0.4063619077205658, "learning_rate": 1.1804757147570213e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2187 }, { "completion_length": 201.35714721679688, "epoch": 2.0998080614203456, "grad_norm": 1.3845528364181519, "kl": 0.35332053899765015, "learning_rate": 1.1781817218569782e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2188 }, { "completion_length": 177.07144165039062, "epoch": 2.100767754318618, "grad_norm": 4.014105319976807, "kl": 0.43775296211242676, "learning_rate": 1.1758892727226546e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2189 }, { "completion_length": 159.1428680419922, "epoch": 2.1017274472168905, "grad_norm": 1.3877198696136475, "kl": 0.4002152979373932, "learning_rate": 1.1735983700314256e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2190 }, { "completion_length": 176.2857208251953, "epoch": 2.102687140115163, "grad_norm": 0.008436511270701885, "kl": 0.35179126262664795, "learning_rate": 1.1713090164588606e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2191 }, { "completion_length": 96.64286041259766, "epoch": 2.103646833013436, "grad_norm": 0.02798357419669628, "kl": 0.47744596004486084, "learning_rate": 1.1690212146787207e-07, "loss": 0.0005, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2192 }, { "completion_length": 178.07144165039062, "epoch": 2.1046065259117084, "grad_norm": 0.010930081829428673, "kl": 0.35815078020095825, "learning_rate": 1.1667349673629526e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2193 }, { "completion_length": 167.92857360839844, "epoch": 2.105566218809981, "grad_norm": 0.011365210637450218, "kl": 0.36447760462760925, "learning_rate": 1.1644502771816911e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2194 }, { "completion_length": 177.6428680419922, "epoch": 2.1065259117082533, "grad_norm": 1.0395492315292358, "kl": 0.3450736403465271, "learning_rate": 1.1621671468032493e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2195 }, { "completion_length": 138.42857360839844, "epoch": 2.107485604606526, "grad_norm": 1.862489104270935, "kl": 0.4582013189792633, "learning_rate": 1.1598855788941189e-07, "loss": 0.0005, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2196 }, { "completion_length": 197.07144165039062, "epoch": 2.1084452975047983, "grad_norm": 0.015114451758563519, "kl": 0.3730555474758148, "learning_rate": 1.1576055761189702e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2197 }, { "completion_length": 159.7857208251953, "epoch": 2.109404990403071, "grad_norm": 0.01803317666053772, "kl": 0.42753681540489197, "learning_rate": 1.155327141140639e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2198 }, { "completion_length": 194.7857208251953, "epoch": 2.1103646833013436, "grad_norm": 1.5914254188537598, "kl": 0.3419155478477478, "learning_rate": 1.1530502766201369e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2199 }, { "completion_length": 192.2857208251953, "epoch": 2.111324376199616, "grad_norm": 1.412710189819336, "kl": 0.3400769531726837, "learning_rate": 1.150774985216637e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2200 }, { "completion_length": 243.35714721679688, "epoch": 2.1122840690978886, "grad_norm": 1.6854368448257446, "kl": 0.2588157653808594, "learning_rate": 1.1485012695874763e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2201 }, { "completion_length": 180.92857360839844, "epoch": 2.113243761996161, "grad_norm": 0.6411627531051636, "kl": 0.37843528389930725, "learning_rate": 1.1462291323881528e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2202 }, { "completion_length": 170.6428680419922, "epoch": 2.114203454894434, "grad_norm": 2.2254672050476074, "kl": 0.42216116189956665, "learning_rate": 1.1439585762723192e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2203 }, { "completion_length": 213.2857208251953, "epoch": 2.1151631477927064, "grad_norm": 1.936513900756836, "kl": 0.3997683823108673, "learning_rate": 1.1416896038917823e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2204 }, { "completion_length": 130.92857360839844, "epoch": 2.116122840690979, "grad_norm": 0.026373716071248055, "kl": 0.49715909361839294, "learning_rate": 1.139422217896499e-07, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2205 }, { "completion_length": 175.07144165039062, "epoch": 2.1170825335892514, "grad_norm": 1.827970027923584, "kl": 0.39544788002967834, "learning_rate": 1.1371564209345732e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2206 }, { "completion_length": 170.6428680419922, "epoch": 2.118042226487524, "grad_norm": 0.009615090675652027, "kl": 0.3464759588241577, "learning_rate": 1.134892215652255e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2207 }, { "completion_length": 211.57144165039062, "epoch": 2.1190019193857967, "grad_norm": 0.5178061723709106, "kl": 0.400054395198822, "learning_rate": 1.1326296046939333e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2208 }, { "completion_length": 149.6428680419922, "epoch": 2.119961612284069, "grad_norm": 0.01588529348373413, "kl": 0.4219527840614319, "learning_rate": 1.1303685907021343e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2209 }, { "completion_length": 176.85714721679688, "epoch": 2.1209213051823417, "grad_norm": 0.011181281879544258, "kl": 0.35062137246131897, "learning_rate": 1.1281091763175226e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2210 }, { "completion_length": 233.1428680419922, "epoch": 2.121880998080614, "grad_norm": 0.034133464097976685, "kl": 0.36753785610198975, "learning_rate": 1.1258513641788913e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2211 }, { "completion_length": 243.21429443359375, "epoch": 2.1228406909788866, "grad_norm": 0.008164580911397934, "kl": 0.24114514887332916, "learning_rate": 1.1235951569231639e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2212 }, { "completion_length": 223.21429443359375, "epoch": 2.1238003838771595, "grad_norm": 0.013499976135790348, "kl": 0.30008113384246826, "learning_rate": 1.1213405571853885e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2213 }, { "completion_length": 172.71429443359375, "epoch": 2.124760076775432, "grad_norm": 0.008648005314171314, "kl": 0.3414512574672699, "learning_rate": 1.1190875675987355e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2214 }, { "completion_length": 243.71429443359375, "epoch": 2.1257197696737045, "grad_norm": 1.0518412590026855, "kl": 0.29093366861343384, "learning_rate": 1.1168361907944974e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2215 }, { "completion_length": 165.35714721679688, "epoch": 2.126679462571977, "grad_norm": 0.8253960609436035, "kl": 0.42155998945236206, "learning_rate": 1.1145864294020802e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2216 }, { "completion_length": 234.57144165039062, "epoch": 2.1276391554702494, "grad_norm": 1.5265398025512695, "kl": 0.297757089138031, "learning_rate": 1.1123382860490035e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2217 }, { "completion_length": 241.50001525878906, "epoch": 2.128598848368522, "grad_norm": 0.7332730293273926, "kl": 0.4088786244392395, "learning_rate": 1.1100917633609009e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2218 }, { "completion_length": 183.57144165039062, "epoch": 2.129558541266795, "grad_norm": 0.011880104430019855, "kl": 0.3445872962474823, "learning_rate": 1.1078468639615066e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2219 }, { "completion_length": 172.00001525878906, "epoch": 2.1305182341650672, "grad_norm": 1.0185467004776, "kl": 0.41503041982650757, "learning_rate": 1.1056035904726651e-07, "loss": 0.0004, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2220 }, { "completion_length": 207.92857360839844, "epoch": 2.1314779270633397, "grad_norm": 0.010460572317242622, "kl": 0.272518515586853, "learning_rate": 1.1033619455143193e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2221 }, { "completion_length": 165.85714721679688, "epoch": 2.132437619961612, "grad_norm": 1.1467835903167725, "kl": 0.34634318947792053, "learning_rate": 1.1011219317045083e-07, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2222 }, { "completion_length": 168.7857208251953, "epoch": 2.1333973128598847, "grad_norm": 0.010432816110551357, "kl": 0.3852117657661438, "learning_rate": 1.0988835516593712e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2223 }, { "completion_length": 154.71429443359375, "epoch": 2.1343570057581576, "grad_norm": 1.4622241258621216, "kl": 0.3656364679336548, "learning_rate": 1.0966468079931345e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2224 }, { "completion_length": 200.35714721679688, "epoch": 2.13531669865643, "grad_norm": 0.660490870475769, "kl": 0.28752073645591736, "learning_rate": 1.094411703318115e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2225 }, { "completion_length": 190.42857360839844, "epoch": 2.1362763915547025, "grad_norm": 2.2043304443359375, "kl": 0.3349076807498932, "learning_rate": 1.0921782402447158e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2226 }, { "completion_length": 177.21429443359375, "epoch": 2.137236084452975, "grad_norm": 0.8015868067741394, "kl": 0.3188447952270508, "learning_rate": 1.089946421381421e-07, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2227 }, { "completion_length": 204.92857360839844, "epoch": 2.1381957773512474, "grad_norm": 0.6517558693885803, "kl": 0.29672855138778687, "learning_rate": 1.0877162493347977e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2228 }, { "completion_length": 156.5, "epoch": 2.1391554702495204, "grad_norm": 1.9276422262191772, "kl": 0.3515912592411041, "learning_rate": 1.085487726709487e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2229 }, { "completion_length": 174.50001525878906, "epoch": 2.140115163147793, "grad_norm": 0.010103218257427216, "kl": 0.3344026803970337, "learning_rate": 1.0832608561082032e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2230 }, { "completion_length": 166.07144165039062, "epoch": 2.1410748560460653, "grad_norm": 0.015109160915017128, "kl": 0.3998062312602997, "learning_rate": 1.081035640131735e-07, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2231 }, { "completion_length": 156.7857208251953, "epoch": 2.1420345489443378, "grad_norm": 0.04720485955476761, "kl": 0.4627130627632141, "learning_rate": 1.0788120813789326e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2232 }, { "completion_length": 183.85714721679688, "epoch": 2.1429942418426102, "grad_norm": 1.5570000410079956, "kl": 0.32194989919662476, "learning_rate": 1.0765901824467166e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2233 }, { "completion_length": 193.07144165039062, "epoch": 2.1439539347408827, "grad_norm": 0.796298086643219, "kl": 0.3696627914905548, "learning_rate": 1.0743699459300656e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2234 }, { "completion_length": 136.6428680419922, "epoch": 2.1449136276391556, "grad_norm": 1.1806868314743042, "kl": 0.38852113485336304, "learning_rate": 1.0721513744220168e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2235 }, { "completion_length": 201.57144165039062, "epoch": 2.145873320537428, "grad_norm": 1.8357577323913574, "kl": 0.32230764627456665, "learning_rate": 1.0699344705136653e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2236 }, { "completion_length": 173.57144165039062, "epoch": 2.1468330134357005, "grad_norm": 0.008622914552688599, "kl": 0.2911013662815094, "learning_rate": 1.0677192367941562e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2237 }, { "completion_length": 141.07144165039062, "epoch": 2.147792706333973, "grad_norm": 0.9523184895515442, "kl": 0.4135355055332184, "learning_rate": 1.0655056758506845e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2238 }, { "completion_length": 141.92857360839844, "epoch": 2.1487523992322455, "grad_norm": 1.7636710405349731, "kl": 0.4891684353351593, "learning_rate": 1.0632937902684918e-07, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2239 }, { "completion_length": 155.07144165039062, "epoch": 2.1497120921305184, "grad_norm": 2.4089674949645996, "kl": 0.3624024987220764, "learning_rate": 1.0610835826308622e-07, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2240 }, { "completion_length": 151.85714721679688, "epoch": 2.150671785028791, "grad_norm": 0.9141308069229126, "kl": 0.49959760904312134, "learning_rate": 1.0588750555191225e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2241 }, { "completion_length": 163.0, "epoch": 2.1516314779270633, "grad_norm": 0.00683703413233161, "kl": 0.3498028814792633, "learning_rate": 1.0566682115126344e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2242 }, { "completion_length": 191.85714721679688, "epoch": 2.152591170825336, "grad_norm": 1.147384524345398, "kl": 0.34392330050468445, "learning_rate": 1.0544630531887933e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2243 }, { "completion_length": 194.6428680419922, "epoch": 2.1535508637236083, "grad_norm": 0.0118038859218359, "kl": 0.27275216579437256, "learning_rate": 1.0522595831230294e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2244 }, { "completion_length": 169.2857208251953, "epoch": 2.154510556621881, "grad_norm": 0.008654561825096607, "kl": 0.34043624997138977, "learning_rate": 1.050057803888798e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2245 }, { "completion_length": 160.0, "epoch": 2.1554702495201536, "grad_norm": 0.008076932281255722, "kl": 0.3212208151817322, "learning_rate": 1.0478577180575809e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2246 }, { "completion_length": 247.50001525878906, "epoch": 2.156429942418426, "grad_norm": 0.5040544867515564, "kl": 0.26002153754234314, "learning_rate": 1.0456593281988815e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2247 }, { "completion_length": 140.71429443359375, "epoch": 2.1573896353166986, "grad_norm": 1.77802574634552, "kl": 0.4073006212711334, "learning_rate": 1.043462636880222e-07, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2248 }, { "completion_length": 179.50001525878906, "epoch": 2.158349328214971, "grad_norm": 1.9105573892593384, "kl": 0.3729563057422638, "learning_rate": 1.0412676466671436e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2249 }, { "completion_length": 201.50001525878906, "epoch": 2.159309021113244, "grad_norm": 1.216043472290039, "kl": 0.25677549839019775, "learning_rate": 1.0390743601231983e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2250 }, { "completion_length": 140.0, "epoch": 2.1602687140115164, "grad_norm": 0.041330281645059586, "kl": 0.45355212688446045, "learning_rate": 1.0368827798099472e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2251 }, { "completion_length": 150.07144165039062, "epoch": 2.161228406909789, "grad_norm": 1.4545389413833618, "kl": 0.4031859338283539, "learning_rate": 1.0346929082869641e-07, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2252 }, { "completion_length": 136.85714721679688, "epoch": 2.1621880998080614, "grad_norm": 1.6357982158660889, "kl": 0.4807808995246887, "learning_rate": 1.0325047481118191e-07, "loss": 0.0005, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2253 }, { "completion_length": 163.6428680419922, "epoch": 2.163147792706334, "grad_norm": 0.020248940214514732, "kl": 0.504530668258667, "learning_rate": 1.0303183018400908e-07, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2254 }, { "completion_length": 185.6428680419922, "epoch": 2.1641074856046068, "grad_norm": 1.1503291130065918, "kl": 0.44497397541999817, "learning_rate": 1.0281335720253526e-07, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2255 }, { "completion_length": 185.07144165039062, "epoch": 2.165067178502879, "grad_norm": 0.9726617932319641, "kl": 0.3031022250652313, "learning_rate": 1.0259505612191724e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2256 }, { "completion_length": 218.85714721679688, "epoch": 2.1660268714011517, "grad_norm": 0.01754288747906685, "kl": 0.3006456792354584, "learning_rate": 1.0237692719711139e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2257 }, { "completion_length": 167.7857208251953, "epoch": 2.166986564299424, "grad_norm": 1.0112392902374268, "kl": 0.3402155637741089, "learning_rate": 1.0215897068287274e-07, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2258 }, { "completion_length": 189.35714721679688, "epoch": 2.1679462571976966, "grad_norm": 0.016637351363897324, "kl": 0.2882312834262848, "learning_rate": 1.0194118683375502e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2259 }, { "completion_length": 222.50001525878906, "epoch": 2.168905950095969, "grad_norm": 0.007152179256081581, "kl": 0.24142606556415558, "learning_rate": 1.0172357590411035e-07, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2260 }, { "completion_length": 174.21429443359375, "epoch": 2.169865642994242, "grad_norm": 1.5444742441177368, "kl": 0.3392682671546936, "learning_rate": 1.0150613814808875e-07, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2261 }, { "completion_length": 201.00001525878906, "epoch": 2.1708253358925145, "grad_norm": 0.033065348863601685, "kl": 0.3007781207561493, "learning_rate": 1.0128887381963826e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2262 }, { "completion_length": 200.2857208251953, "epoch": 2.171785028790787, "grad_norm": 1.4817148447036743, "kl": 0.31526699662208557, "learning_rate": 1.010717831725042e-07, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2263 }, { "completion_length": 162.57144165039062, "epoch": 2.1727447216890594, "grad_norm": 0.010193306021392345, "kl": 0.35711732506752014, "learning_rate": 1.0085486646022887e-07, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2264 }, { "completion_length": 181.07144165039062, "epoch": 2.173704414587332, "grad_norm": 0.008381015621125698, "kl": 0.30618444085121155, "learning_rate": 1.0063812393615198e-07, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2265 }, { "completion_length": 183.00001525878906, "epoch": 2.174664107485605, "grad_norm": 1.0695639848709106, "kl": 0.3808076083660126, "learning_rate": 1.0042155585340903e-07, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2266 }, { "completion_length": 210.92857360839844, "epoch": 2.1756238003838773, "grad_norm": 0.020098701119422913, "kl": 0.2887342572212219, "learning_rate": 1.0020516246493249e-07, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2267 }, { "completion_length": 199.92857360839844, "epoch": 2.1765834932821497, "grad_norm": 0.008682209067046642, "kl": 0.29889002442359924, "learning_rate": 9.998894402345043e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2268 }, { "completion_length": 143.0, "epoch": 2.177543186180422, "grad_norm": 1.766879916191101, "kl": 0.38378387689590454, "learning_rate": 9.977290078148656e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2269 }, { "completion_length": 189.1428680419922, "epoch": 2.1785028790786947, "grad_norm": 0.016866518184542656, "kl": 0.3114365339279175, "learning_rate": 9.955703299136034e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2270 }, { "completion_length": 151.0, "epoch": 2.1794625719769676, "grad_norm": 1.8072845935821533, "kl": 0.3542937934398651, "learning_rate": 9.934134090518592e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2271 }, { "completion_length": 203.50001525878906, "epoch": 2.18042226487524, "grad_norm": 0.7643098831176758, "kl": 0.3245689272880554, "learning_rate": 9.912582477487244e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2272 }, { "completion_length": 186.00001525878906, "epoch": 2.1813819577735125, "grad_norm": 0.009571930393576622, "kl": 0.271981805562973, "learning_rate": 9.891048485212347e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2273 }, { "completion_length": 224.35714721679688, "epoch": 2.182341650671785, "grad_norm": 0.011276448145508766, "kl": 0.2361380159854889, "learning_rate": 9.869532138843672e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2274 }, { "completion_length": 129.35714721679688, "epoch": 2.1833013435700575, "grad_norm": 1.4282732009887695, "kl": 0.4340873956680298, "learning_rate": 9.848033463510411e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2275 }, { "completion_length": 153.71429443359375, "epoch": 2.18426103646833, "grad_norm": 0.011710720136761665, "kl": 0.3708542585372925, "learning_rate": 9.826552484321085e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2276 }, { "completion_length": 181.00001525878906, "epoch": 2.185220729366603, "grad_norm": 0.05080263316631317, "kl": 0.4540072977542877, "learning_rate": 9.805089226363553e-08, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2277 }, { "completion_length": 190.57144165039062, "epoch": 2.1861804222648753, "grad_norm": 0.009610489010810852, "kl": 0.2696508765220642, "learning_rate": 9.783643714705e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2278 }, { "completion_length": 141.7857208251953, "epoch": 2.1871401151631478, "grad_norm": 1.7462658882141113, "kl": 0.37656670808792114, "learning_rate": 9.76221597439186e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2279 }, { "completion_length": 168.1428680419922, "epoch": 2.1880998080614202, "grad_norm": 1.4974663257598877, "kl": 0.33484917879104614, "learning_rate": 9.740806030449822e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2280 }, { "completion_length": 157.07144165039062, "epoch": 2.1890595009596927, "grad_norm": 1.4179086685180664, "kl": 0.44651952385902405, "learning_rate": 9.719413907883786e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2281 }, { "completion_length": 187.42857360839844, "epoch": 2.1900191938579656, "grad_norm": 1.078112244606018, "kl": 0.38269752264022827, "learning_rate": 9.698039631677832e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2282 }, { "completion_length": 153.6428680419922, "epoch": 2.190978886756238, "grad_norm": 1.2488102912902832, "kl": 0.4003477394580841, "learning_rate": 9.676683226795229e-08, "loss": 0.0004, "reward": 1.571428656578064, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2283 }, { "completion_length": 190.00001525878906, "epoch": 2.1919385796545106, "grad_norm": 0.02577224001288414, "kl": 0.3215872645378113, "learning_rate": 9.655344718178335e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2284 }, { "completion_length": 184.35714721679688, "epoch": 2.192898272552783, "grad_norm": 1.7135283946990967, "kl": 0.3244791030883789, "learning_rate": 9.63402413074862e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2285 }, { "completion_length": 188.42857360839844, "epoch": 2.1938579654510555, "grad_norm": 0.8940168023109436, "kl": 0.30173665285110474, "learning_rate": 9.612721489406647e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2286 }, { "completion_length": 134.2857208251953, "epoch": 2.1948176583493284, "grad_norm": 2.2382235527038574, "kl": 0.40941545367240906, "learning_rate": 9.591436819031967e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2287 }, { "completion_length": 169.1428680419922, "epoch": 2.195777351247601, "grad_norm": 0.0100932028144598, "kl": 0.32597658038139343, "learning_rate": 9.5701701444832e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2288 }, { "completion_length": 184.85714721679688, "epoch": 2.1967370441458733, "grad_norm": 0.008340233005583286, "kl": 0.31085819005966187, "learning_rate": 9.548921490597916e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2289 }, { "completion_length": 182.42857360839844, "epoch": 2.197696737044146, "grad_norm": 0.014093113131821156, "kl": 0.3090254068374634, "learning_rate": 9.527690882192635e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2290 }, { "completion_length": 197.50001525878906, "epoch": 2.1986564299424183, "grad_norm": 0.01637759990990162, "kl": 0.332448810338974, "learning_rate": 9.506478344062829e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2291 }, { "completion_length": 151.92857360839844, "epoch": 2.199616122840691, "grad_norm": 1.0982497930526733, "kl": 0.3692462742328644, "learning_rate": 9.485283900982841e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2292 }, { "completion_length": 158.42857360839844, "epoch": 2.2005758157389637, "grad_norm": 0.6039838194847107, "kl": 0.3850418031215668, "learning_rate": 9.464107577705886e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2293 }, { "completion_length": 183.07144165039062, "epoch": 2.201535508637236, "grad_norm": 2.848324775695801, "kl": 0.41800931096076965, "learning_rate": 9.442949398964017e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2294 }, { "completion_length": 145.6428680419922, "epoch": 2.2024952015355086, "grad_norm": 0.010525790974497795, "kl": 0.4491075277328491, "learning_rate": 9.421809389468097e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2295 }, { "completion_length": 164.35714721679688, "epoch": 2.203454894433781, "grad_norm": 0.011089333333075047, "kl": 0.3448173701763153, "learning_rate": 9.400687573907775e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2296 }, { "completion_length": 202.07144165039062, "epoch": 2.204414587332054, "grad_norm": 0.9500468969345093, "kl": 0.34149304032325745, "learning_rate": 9.379583976951449e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2297 }, { "completion_length": 260.2857360839844, "epoch": 2.2053742802303264, "grad_norm": 0.01766246370971203, "kl": 0.2424723356962204, "learning_rate": 9.358498623246219e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2298 }, { "completion_length": 203.07144165039062, "epoch": 2.206333973128599, "grad_norm": 0.4985853433609009, "kl": 0.2694071829319, "learning_rate": 9.337431537417923e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2299 }, { "completion_length": 170.07144165039062, "epoch": 2.2072936660268714, "grad_norm": 1.5133053064346313, "kl": 0.36489570140838623, "learning_rate": 9.316382744071002e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2300 }, { "completion_length": 152.5, "epoch": 2.208253358925144, "grad_norm": 1.652052402496338, "kl": 0.3136995732784271, "learning_rate": 9.295352267788592e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2301 }, { "completion_length": 196.00001525878906, "epoch": 2.2092130518234163, "grad_norm": 0.8000397086143494, "kl": 0.3978213965892792, "learning_rate": 9.274340133132406e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2302 }, { "completion_length": 153.07144165039062, "epoch": 2.2101727447216892, "grad_norm": 2.1990911960601807, "kl": 0.34965211153030396, "learning_rate": 9.253346364642731e-08, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.3535533845424652, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2303 }, { "completion_length": 152.2857208251953, "epoch": 2.2111324376199617, "grad_norm": 1.1370735168457031, "kl": 0.36851370334625244, "learning_rate": 9.232370986838428e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2304 }, { "completion_length": 150.0, "epoch": 2.212092130518234, "grad_norm": 1.4682972431182861, "kl": 0.3583117425441742, "learning_rate": 9.21141402421686e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2305 }, { "completion_length": 199.6428680419922, "epoch": 2.2130518234165066, "grad_norm": 0.013028915040194988, "kl": 0.3193378448486328, "learning_rate": 9.190475501253885e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2306 }, { "completion_length": 204.92857360839844, "epoch": 2.214011516314779, "grad_norm": 0.017036352306604385, "kl": 0.30802541971206665, "learning_rate": 9.169555442403834e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2307 }, { "completion_length": 189.71429443359375, "epoch": 2.214971209213052, "grad_norm": 1.8278406858444214, "kl": 0.31913936138153076, "learning_rate": 9.148653872099455e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2308 }, { "completion_length": 175.85714721679688, "epoch": 2.2159309021113245, "grad_norm": 0.008312499150633812, "kl": 0.3403278887271881, "learning_rate": 9.127770814751932e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2309 }, { "completion_length": 157.0, "epoch": 2.216890595009597, "grad_norm": 0.009312300942838192, "kl": 0.38261058926582336, "learning_rate": 9.106906294750804e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2310 }, { "completion_length": 181.6428680419922, "epoch": 2.2178502879078694, "grad_norm": 0.010095637291669846, "kl": 0.32379812002182007, "learning_rate": 9.086060336463958e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2311 }, { "completion_length": 181.50001525878906, "epoch": 2.218809980806142, "grad_norm": 0.012788722291588783, "kl": 0.31009599566459656, "learning_rate": 9.065232964237631e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2312 }, { "completion_length": 209.21429443359375, "epoch": 2.2197696737044144, "grad_norm": 0.007747733499854803, "kl": 0.26714393496513367, "learning_rate": 9.044424202396325e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2313 }, { "completion_length": 162.85714721679688, "epoch": 2.2207293666026873, "grad_norm": 1.0120543241500854, "kl": 0.35264691710472107, "learning_rate": 9.023634075242815e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2314 }, { "completion_length": 185.2857208251953, "epoch": 2.2216890595009597, "grad_norm": 0.0085907606408, "kl": 0.26588523387908936, "learning_rate": 9.00286260705811e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2315 }, { "completion_length": 176.6428680419922, "epoch": 2.222648752399232, "grad_norm": 0.010799570009112358, "kl": 0.32605621218681335, "learning_rate": 8.982109822101425e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2316 }, { "completion_length": 173.57144165039062, "epoch": 2.2236084452975047, "grad_norm": 1.3782517910003662, "kl": 0.3049265742301941, "learning_rate": 8.961375744610178e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2317 }, { "completion_length": 185.85714721679688, "epoch": 2.224568138195777, "grad_norm": 1.0838412046432495, "kl": 0.3492510914802551, "learning_rate": 8.94066039879991e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2318 }, { "completion_length": 158.92857360839844, "epoch": 2.22552783109405, "grad_norm": 1.4316790103912354, "kl": 0.32690081000328064, "learning_rate": 8.919963808864283e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2319 }, { "completion_length": 218.92857360839844, "epoch": 2.2264875239923225, "grad_norm": 0.1826101839542389, "kl": 0.6497124433517456, "learning_rate": 8.899285998975101e-08, "loss": 0.0006, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2320 }, { "completion_length": 180.85714721679688, "epoch": 2.227447216890595, "grad_norm": 0.03384352847933769, "kl": 0.3659009337425232, "learning_rate": 8.878626993282154e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2321 }, { "completion_length": 132.92857360839844, "epoch": 2.2284069097888675, "grad_norm": 1.4584321975708008, "kl": 0.3829643428325653, "learning_rate": 8.857986815913351e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2322 }, { "completion_length": 177.2857208251953, "epoch": 2.22936660268714, "grad_norm": 0.008849883452057838, "kl": 0.34223076701164246, "learning_rate": 8.837365490974561e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2323 }, { "completion_length": 185.42857360839844, "epoch": 2.230326295585413, "grad_norm": 0.5892661809921265, "kl": 0.35408341884613037, "learning_rate": 8.816763042549643e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2324 }, { "completion_length": 156.5, "epoch": 2.2312859884836853, "grad_norm": 0.016391437500715256, "kl": 0.43440279364585876, "learning_rate": 8.796179494700439e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2325 }, { "completion_length": 228.7857208251953, "epoch": 2.232245681381958, "grad_norm": 0.009046043269336224, "kl": 0.2865375876426697, "learning_rate": 8.77561487146668e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2326 }, { "completion_length": 126.0714340209961, "epoch": 2.2332053742802302, "grad_norm": 1.900962233543396, "kl": 0.5534769296646118, "learning_rate": 8.755069196866013e-08, "loss": 0.0006, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2327 }, { "completion_length": 193.6428680419922, "epoch": 2.2341650671785027, "grad_norm": 0.010489541105926037, "kl": 0.2922463119029999, "learning_rate": 8.734542494893954e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2328 }, { "completion_length": 133.57144165039062, "epoch": 2.2351247600767756, "grad_norm": 0.7781608700752258, "kl": 0.36442744731903076, "learning_rate": 8.714034789523841e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2329 }, { "completion_length": 156.07144165039062, "epoch": 2.236084452975048, "grad_norm": 0.007688295561820269, "kl": 0.321687787771225, "learning_rate": 8.693546104706868e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2330 }, { "completion_length": 166.35714721679688, "epoch": 2.2370441458733206, "grad_norm": 0.016731994226574898, "kl": 0.3361990749835968, "learning_rate": 8.673076464371979e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2331 }, { "completion_length": 186.2857208251953, "epoch": 2.238003838771593, "grad_norm": 1.0645352602005005, "kl": 0.3499471843242645, "learning_rate": 8.652625892425871e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2332 }, { "completion_length": 141.57144165039062, "epoch": 2.2389635316698655, "grad_norm": 1.691064476966858, "kl": 0.40892472863197327, "learning_rate": 8.632194412753021e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2333 }, { "completion_length": 166.5, "epoch": 2.2399232245681384, "grad_norm": 0.9859452247619629, "kl": 0.3112918734550476, "learning_rate": 8.611782049215532e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2334 }, { "completion_length": 201.00001525878906, "epoch": 2.240882917466411, "grad_norm": 1.1614038944244385, "kl": 0.336837500333786, "learning_rate": 8.591388825653253e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2335 }, { "completion_length": 168.7857208251953, "epoch": 2.2418426103646834, "grad_norm": 0.010288489051163197, "kl": 0.30426225066185, "learning_rate": 8.571014765883636e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2336 }, { "completion_length": 136.2857208251953, "epoch": 2.242802303262956, "grad_norm": 0.01223420538008213, "kl": 0.35010260343551636, "learning_rate": 8.550659893701753e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2337 }, { "completion_length": 154.85714721679688, "epoch": 2.2437619961612283, "grad_norm": 1.7932173013687134, "kl": 0.3995441794395447, "learning_rate": 8.530324232880298e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2338 }, { "completion_length": 207.92857360839844, "epoch": 2.2447216890595008, "grad_norm": 0.00870857760310173, "kl": 0.2965749502182007, "learning_rate": 8.510007807169498e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2339 }, { "completion_length": 179.35714721679688, "epoch": 2.2456813819577737, "grad_norm": 1.1678712368011475, "kl": 0.29912206530570984, "learning_rate": 8.489710640297124e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2340 }, { "completion_length": 197.85714721679688, "epoch": 2.246641074856046, "grad_norm": 2.0110793113708496, "kl": 0.2928957939147949, "learning_rate": 8.469432755968454e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2341 }, { "completion_length": 164.57144165039062, "epoch": 2.2476007677543186, "grad_norm": 0.6496718525886536, "kl": 0.3407835364341736, "learning_rate": 8.449174177866237e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2342 }, { "completion_length": 148.7857208251953, "epoch": 2.248560460652591, "grad_norm": 0.045508261770009995, "kl": 0.47254040837287903, "learning_rate": 8.4289349296507e-08, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2343 }, { "completion_length": 184.07144165039062, "epoch": 2.2495201535508635, "grad_norm": 0.009464515373110771, "kl": 0.2713713049888611, "learning_rate": 8.408715034959468e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2344 }, { "completion_length": 153.2857208251953, "epoch": 2.2504798464491365, "grad_norm": 2.188969373703003, "kl": 0.3688879907131195, "learning_rate": 8.388514517407568e-08, "loss": 0.0004, "reward": 1.6071429252624512, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3214285969734192, "step": 2345 }, { "completion_length": 163.7857208251953, "epoch": 2.251439539347409, "grad_norm": 0.030066829174757004, "kl": 0.36072996258735657, "learning_rate": 8.368333400587419e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2346 }, { "completion_length": 131.85714721679688, "epoch": 2.2523992322456814, "grad_norm": 1.0784834623336792, "kl": 0.3779542148113251, "learning_rate": 8.348171708068747e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2347 }, { "completion_length": 126.00000762939453, "epoch": 2.253358925143954, "grad_norm": 0.011691184714436531, "kl": 0.40159890055656433, "learning_rate": 8.328029463398614e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2348 }, { "completion_length": 180.50001525878906, "epoch": 2.2543186180422263, "grad_norm": 0.7488894462585449, "kl": 0.31373298168182373, "learning_rate": 8.307906690101363e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2349 }, { "completion_length": 152.07144165039062, "epoch": 2.255278310940499, "grad_norm": 0.011142004281282425, "kl": 0.35172218084335327, "learning_rate": 8.287803411678587e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2350 }, { "completion_length": 178.1428680419922, "epoch": 2.2562380038387717, "grad_norm": 0.00994914211332798, "kl": 0.298773854970932, "learning_rate": 8.26771965160914e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2351 }, { "completion_length": 173.7857208251953, "epoch": 2.257197696737044, "grad_norm": 0.9136143922805786, "kl": 0.3535648286342621, "learning_rate": 8.247655433349046e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2352 }, { "completion_length": 180.85714721679688, "epoch": 2.2581573896353166, "grad_norm": 1.5318102836608887, "kl": 0.41495388746261597, "learning_rate": 8.227610780331515e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2353 }, { "completion_length": 174.92857360839844, "epoch": 2.259117082533589, "grad_norm": 0.007413059007376432, "kl": 0.29984521865844727, "learning_rate": 8.207585715966939e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2354 }, { "completion_length": 151.35714721679688, "epoch": 2.2600767754318616, "grad_norm": 0.04691081866621971, "kl": 0.42298924922943115, "learning_rate": 8.187580263642768e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2355 }, { "completion_length": 181.57144165039062, "epoch": 2.2610364683301345, "grad_norm": 1.2313203811645508, "kl": 0.36031439900398254, "learning_rate": 8.167594446723611e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2356 }, { "completion_length": 205.2857208251953, "epoch": 2.261996161228407, "grad_norm": 1.7372177839279175, "kl": 0.2903035581111908, "learning_rate": 8.147628288551111e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2357 }, { "completion_length": 210.21429443359375, "epoch": 2.2629558541266794, "grad_norm": 0.5505134463310242, "kl": 0.33399707078933716, "learning_rate": 8.127681812443946e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2358 }, { "completion_length": 182.71429443359375, "epoch": 2.263915547024952, "grad_norm": 0.007848742417991161, "kl": 0.30948078632354736, "learning_rate": 8.107755041697836e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2359 }, { "completion_length": 210.42857360839844, "epoch": 2.2648752399232244, "grad_norm": 0.009668463841080666, "kl": 0.2732465863227844, "learning_rate": 8.087847999585465e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2360 }, { "completion_length": 228.00001525878906, "epoch": 2.2658349328214973, "grad_norm": 1.1439926624298096, "kl": 0.2525472044944763, "learning_rate": 8.067960709356478e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2361 }, { "completion_length": 178.42857360839844, "epoch": 2.2667946257197698, "grad_norm": 0.00913035124540329, "kl": 0.32526493072509766, "learning_rate": 8.048093194237452e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2362 }, { "completion_length": 180.85714721679688, "epoch": 2.267754318618042, "grad_norm": 1.1429283618927002, "kl": 0.30074572563171387, "learning_rate": 8.028245477431866e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2363 }, { "completion_length": 193.00001525878906, "epoch": 2.2687140115163147, "grad_norm": 1.4577040672302246, "kl": 0.31667622923851013, "learning_rate": 8.008417582120097e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2364 }, { "completion_length": 208.2857208251953, "epoch": 2.269673704414587, "grad_norm": 0.011210980825126171, "kl": 0.32798755168914795, "learning_rate": 7.988609531459348e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2365 }, { "completion_length": 160.71429443359375, "epoch": 2.27063339731286, "grad_norm": 0.00958255585283041, "kl": 0.3747883141040802, "learning_rate": 7.968821348583643e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2366 }, { "completion_length": 154.6428680419922, "epoch": 2.2715930902111325, "grad_norm": 0.0070836106315255165, "kl": 0.3211817741394043, "learning_rate": 7.94905305660384e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2367 }, { "completion_length": 216.6428680419922, "epoch": 2.272552783109405, "grad_norm": 1.5714529752731323, "kl": 0.3022826611995697, "learning_rate": 7.929304678607505e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2368 }, { "completion_length": 169.1428680419922, "epoch": 2.2735124760076775, "grad_norm": 0.5690542459487915, "kl": 0.37599387764930725, "learning_rate": 7.909576237659002e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2369 }, { "completion_length": 142.35714721679688, "epoch": 2.27447216890595, "grad_norm": 0.017705651000142097, "kl": 0.4102109968662262, "learning_rate": 7.889867756799384e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2370 }, { "completion_length": 201.07144165039062, "epoch": 2.275431861804223, "grad_norm": 0.009250440634787083, "kl": 0.2527216672897339, "learning_rate": 7.870179259046389e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2371 }, { "completion_length": 192.00001525878906, "epoch": 2.2763915547024953, "grad_norm": 0.8702223300933838, "kl": 0.2828218638896942, "learning_rate": 7.850510767394436e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2372 }, { "completion_length": 152.07144165039062, "epoch": 2.277351247600768, "grad_norm": 0.007180205546319485, "kl": 0.31271451711654663, "learning_rate": 7.830862304814564e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2373 }, { "completion_length": 157.57144165039062, "epoch": 2.2783109404990403, "grad_norm": 0.9957510828971863, "kl": 0.4276602864265442, "learning_rate": 7.811233894254421e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2374 }, { "completion_length": 174.7857208251953, "epoch": 2.2792706333973127, "grad_norm": 0.10908857733011246, "kl": 0.5444903373718262, "learning_rate": 7.791625558638235e-08, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2375 }, { "completion_length": 150.1428680419922, "epoch": 2.2802303262955856, "grad_norm": 0.011377020739018917, "kl": 0.41069525480270386, "learning_rate": 7.772037320866786e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2376 }, { "completion_length": 192.57144165039062, "epoch": 2.281190019193858, "grad_norm": 1.0958499908447266, "kl": 0.3201381266117096, "learning_rate": 7.752469203817405e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2377 }, { "completion_length": 169.92857360839844, "epoch": 2.2821497120921306, "grad_norm": 0.7326059341430664, "kl": 0.3190072774887085, "learning_rate": 7.732921230343892e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2378 }, { "completion_length": 144.92857360839844, "epoch": 2.283109404990403, "grad_norm": 0.009208448231220245, "kl": 0.3432241380214691, "learning_rate": 7.71339342327653e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2379 }, { "completion_length": 171.71429443359375, "epoch": 2.2840690978886755, "grad_norm": 1.0596526861190796, "kl": 0.3198470175266266, "learning_rate": 7.693885805422068e-08, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2380 }, { "completion_length": 135.6428680419922, "epoch": 2.285028790786948, "grad_norm": 0.01782006397843361, "kl": 0.4098416864871979, "learning_rate": 7.674398399563656e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2381 }, { "completion_length": 169.71429443359375, "epoch": 2.285988483685221, "grad_norm": 0.014190520159900188, "kl": 0.3832854628562927, "learning_rate": 7.65493122846084e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2382 }, { "completion_length": 187.57144165039062, "epoch": 2.2869481765834934, "grad_norm": 0.007881158962845802, "kl": 0.31347009539604187, "learning_rate": 7.635484314849541e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2383 }, { "completion_length": 139.35714721679688, "epoch": 2.287907869481766, "grad_norm": 0.01358217652887106, "kl": 0.41229215264320374, "learning_rate": 7.616057681442011e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2384 }, { "completion_length": 135.71429443359375, "epoch": 2.2888675623800383, "grad_norm": 0.009660118259489536, "kl": 0.3606107831001282, "learning_rate": 7.596651350926836e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2385 }, { "completion_length": 172.00001525878906, "epoch": 2.2898272552783108, "grad_norm": 0.015202282927930355, "kl": 0.36486151814460754, "learning_rate": 7.57726534596887e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2386 }, { "completion_length": 167.07144165039062, "epoch": 2.2907869481765837, "grad_norm": 0.03380025178194046, "kl": 0.3852708041667938, "learning_rate": 7.557899689209227e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2387 }, { "completion_length": 176.1428680419922, "epoch": 2.291746641074856, "grad_norm": 0.616092324256897, "kl": 0.33730223774909973, "learning_rate": 7.53855440326529e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2388 }, { "completion_length": 187.00001525878906, "epoch": 2.2927063339731286, "grad_norm": 1.1876417398452759, "kl": 0.3480874001979828, "learning_rate": 7.519229510730593e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2389 }, { "completion_length": 189.92857360839844, "epoch": 2.293666026871401, "grad_norm": 1.2365055084228516, "kl": 0.2712819278240204, "learning_rate": 7.499925034174909e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2390 }, { "completion_length": 151.1428680419922, "epoch": 2.2946257197696736, "grad_norm": 0.009793726727366447, "kl": 0.3927040696144104, "learning_rate": 7.480640996144136e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2391 }, { "completion_length": 165.71429443359375, "epoch": 2.295585412667946, "grad_norm": 0.010866262950003147, "kl": 0.36412322521209717, "learning_rate": 7.4613774191603e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2392 }, { "completion_length": 137.2857208251953, "epoch": 2.296545105566219, "grad_norm": 0.1100330725312233, "kl": 0.5282264947891235, "learning_rate": 7.442134325721552e-08, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2393 }, { "completion_length": 184.85714721679688, "epoch": 2.2975047984644914, "grad_norm": 0.008166417479515076, "kl": 0.2800176441669464, "learning_rate": 7.422911738302104e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2394 }, { "completion_length": 182.50001525878906, "epoch": 2.298464491362764, "grad_norm": 0.037198349833488464, "kl": 0.3427737057209015, "learning_rate": 7.403709679352216e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2395 }, { "completion_length": 159.0, "epoch": 2.2994241842610363, "grad_norm": 1.348203182220459, "kl": 0.36015424132347107, "learning_rate": 7.384528171298183e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2396 }, { "completion_length": 194.92857360839844, "epoch": 2.300383877159309, "grad_norm": 0.7712195515632629, "kl": 0.4175633192062378, "learning_rate": 7.365367236542283e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2397 }, { "completion_length": 148.5, "epoch": 2.3013435700575817, "grad_norm": 1.385331153869629, "kl": 0.37163403630256653, "learning_rate": 7.346226897462793e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2398 }, { "completion_length": 166.71429443359375, "epoch": 2.302303262955854, "grad_norm": 0.8406816720962524, "kl": 0.3532649576663971, "learning_rate": 7.32710717641391e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2399 }, { "completion_length": 181.35714721679688, "epoch": 2.3032629558541267, "grad_norm": 0.9602617025375366, "kl": 0.33637264370918274, "learning_rate": 7.30800809572576e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2400 }, { "completion_length": 160.5, "epoch": 2.304222648752399, "grad_norm": 0.11933541297912598, "kl": 0.44891780614852905, "learning_rate": 7.28892967770438e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2401 }, { "completion_length": 169.71429443359375, "epoch": 2.3051823416506716, "grad_norm": 1.4291465282440186, "kl": 0.3321887254714966, "learning_rate": 7.26987194463163e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2402 }, { "completion_length": 177.57144165039062, "epoch": 2.3061420345489445, "grad_norm": 0.7593781352043152, "kl": 0.3450671434402466, "learning_rate": 7.250834918765267e-08, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2403 }, { "completion_length": 187.7857208251953, "epoch": 2.307101727447217, "grad_norm": 1.5311895608901978, "kl": 0.3006536364555359, "learning_rate": 7.231818622338822e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2404 }, { "completion_length": 174.35714721679688, "epoch": 2.3080614203454894, "grad_norm": 0.015017502009868622, "kl": 0.2890858054161072, "learning_rate": 7.21282307756163e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2405 }, { "completion_length": 169.07144165039062, "epoch": 2.309021113243762, "grad_norm": 1.415032148361206, "kl": 0.2867405414581299, "learning_rate": 7.193848306618805e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2406 }, { "completion_length": 177.50001525878906, "epoch": 2.3099808061420344, "grad_norm": 0.007417899090796709, "kl": 0.29509592056274414, "learning_rate": 7.174894331671177e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2407 }, { "completion_length": 216.07144165039062, "epoch": 2.3109404990403073, "grad_norm": 0.7573505640029907, "kl": 0.23843200504779816, "learning_rate": 7.155961174855296e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2408 }, { "completion_length": 181.2857208251953, "epoch": 2.3119001919385798, "grad_norm": 0.009801539592444897, "kl": 0.30656877160072327, "learning_rate": 7.137048858283401e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2409 }, { "completion_length": 186.7857208251953, "epoch": 2.3128598848368522, "grad_norm": 0.7731830477714539, "kl": 0.28668031096458435, "learning_rate": 7.118157404043376e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2410 }, { "completion_length": 190.35714721679688, "epoch": 2.3138195777351247, "grad_norm": 1.335172414779663, "kl": 0.29180213809013367, "learning_rate": 7.099286834198773e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2411 }, { "completion_length": 250.6428680419922, "epoch": 2.314779270633397, "grad_norm": 0.007668996229767799, "kl": 0.2263069897890091, "learning_rate": 7.080437170788722e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2412 }, { "completion_length": 149.42857360839844, "epoch": 2.31573896353167, "grad_norm": 0.010731285437941551, "kl": 0.3647524416446686, "learning_rate": 7.06160843582794e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2413 }, { "completion_length": 227.50001525878906, "epoch": 2.3166986564299425, "grad_norm": 1.3895034790039062, "kl": 0.24059471487998962, "learning_rate": 7.042800651306724e-08, "loss": 0.0002, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2414 }, { "completion_length": 200.50001525878906, "epoch": 2.317658349328215, "grad_norm": 1.507825493812561, "kl": 0.28060004115104675, "learning_rate": 7.024013839190879e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2415 }, { "completion_length": 213.6428680419922, "epoch": 2.3186180422264875, "grad_norm": 0.008739498443901539, "kl": 0.23428314924240112, "learning_rate": 7.005248021421722e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2416 }, { "completion_length": 184.57144165039062, "epoch": 2.31957773512476, "grad_norm": 1.4615356922149658, "kl": 0.3182896673679352, "learning_rate": 6.986503219916056e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2417 }, { "completion_length": 159.92857360839844, "epoch": 2.320537428023033, "grad_norm": 0.007538393139839172, "kl": 0.3278430700302124, "learning_rate": 6.967779456566125e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2418 }, { "completion_length": 160.2857208251953, "epoch": 2.3214971209213053, "grad_norm": 0.8066189289093018, "kl": 0.33942195773124695, "learning_rate": 6.94907675323963e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2419 }, { "completion_length": 197.50001525878906, "epoch": 2.322456813819578, "grad_norm": 1.6349856853485107, "kl": 0.3399483263492584, "learning_rate": 6.930395131779648e-08, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2420 }, { "completion_length": 187.07144165039062, "epoch": 2.3234165067178503, "grad_norm": 0.017041167244315147, "kl": 0.29062360525131226, "learning_rate": 6.911734614004641e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2421 }, { "completion_length": 130.85714721679688, "epoch": 2.3243761996161227, "grad_norm": 0.012843627482652664, "kl": 0.41775190830230713, "learning_rate": 6.893095221708448e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2422 }, { "completion_length": 157.85714721679688, "epoch": 2.325335892514395, "grad_norm": 1.0275534391403198, "kl": 0.32437771558761597, "learning_rate": 6.874476976660184e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2423 }, { "completion_length": 168.57144165039062, "epoch": 2.326295585412668, "grad_norm": 0.02992008812725544, "kl": 0.3447946310043335, "learning_rate": 6.85587990060432e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2424 }, { "completion_length": 175.50001525878906, "epoch": 2.3272552783109406, "grad_norm": 1.2751972675323486, "kl": 0.29473885893821716, "learning_rate": 6.837304015260573e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2425 }, { "completion_length": 121.28572082519531, "epoch": 2.328214971209213, "grad_norm": 0.012020844966173172, "kl": 0.41258642077445984, "learning_rate": 6.81874934232391e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2426 }, { "completion_length": 184.00001525878906, "epoch": 2.3291746641074855, "grad_norm": 1.2743828296661377, "kl": 0.3322771191596985, "learning_rate": 6.800215903464546e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2427 }, { "completion_length": 186.07144165039062, "epoch": 2.330134357005758, "grad_norm": 1.5591607093811035, "kl": 0.41431933641433716, "learning_rate": 6.781703720327878e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2428 }, { "completion_length": 194.42857360839844, "epoch": 2.3310940499040305, "grad_norm": 0.6721892952919006, "kl": 0.33048534393310547, "learning_rate": 6.763212814534483e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2429 }, { "completion_length": 136.42857360839844, "epoch": 2.3320537428023034, "grad_norm": 1.034993052482605, "kl": 0.42373576760292053, "learning_rate": 6.744743207680087e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2430 }, { "completion_length": 167.92857360839844, "epoch": 2.333013435700576, "grad_norm": 1.5315582752227783, "kl": 0.3844909965991974, "learning_rate": 6.726294921335532e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2431 }, { "completion_length": 198.92857360839844, "epoch": 2.3339731285988483, "grad_norm": 0.01012028381228447, "kl": 0.30086463689804077, "learning_rate": 6.707867977046785e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2432 }, { "completion_length": 200.00001525878906, "epoch": 2.334932821497121, "grad_norm": 0.008567842654883862, "kl": 0.29436981678009033, "learning_rate": 6.689462396334869e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2433 }, { "completion_length": 233.00001525878906, "epoch": 2.3358925143953932, "grad_norm": 0.9474591612815857, "kl": 0.2391078621149063, "learning_rate": 6.671078200695843e-08, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2434 }, { "completion_length": 138.2857208251953, "epoch": 2.336852207293666, "grad_norm": 0.009926212951540947, "kl": 0.4082980453968048, "learning_rate": 6.652715411600835e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2435 }, { "completion_length": 233.1428680419922, "epoch": 2.3378119001919386, "grad_norm": 0.010858862660825253, "kl": 0.22575075924396515, "learning_rate": 6.634374050495909e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2436 }, { "completion_length": 193.6428680419922, "epoch": 2.338771593090211, "grad_norm": 0.009397550486028194, "kl": 0.285728394985199, "learning_rate": 6.61605413880216e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2437 }, { "completion_length": 219.50001525878906, "epoch": 2.3397312859884836, "grad_norm": 0.008905981667339802, "kl": 0.27246561646461487, "learning_rate": 6.597755697915605e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2438 }, { "completion_length": 154.6428680419922, "epoch": 2.340690978886756, "grad_norm": 0.010640849359333515, "kl": 0.3437267541885376, "learning_rate": 6.57947874920718e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2439 }, { "completion_length": 233.07144165039062, "epoch": 2.341650671785029, "grad_norm": 1.4305310249328613, "kl": 0.314684122800827, "learning_rate": 6.56122331402274e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2440 }, { "completion_length": 246.35714721679688, "epoch": 2.3426103646833014, "grad_norm": 0.010223647579550743, "kl": 0.22401279211044312, "learning_rate": 6.542989413683e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2441 }, { "completion_length": 172.71429443359375, "epoch": 2.343570057581574, "grad_norm": 0.009011102840304375, "kl": 0.3209628462791443, "learning_rate": 6.524777069483525e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2442 }, { "completion_length": 200.1428680419922, "epoch": 2.3445297504798464, "grad_norm": 0.788044273853302, "kl": 0.32687270641326904, "learning_rate": 6.506586302694711e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2443 }, { "completion_length": 174.50001525878906, "epoch": 2.345489443378119, "grad_norm": 1.5684114694595337, "kl": 0.3328644931316376, "learning_rate": 6.488417134561735e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2444 }, { "completion_length": 146.35714721679688, "epoch": 2.3464491362763917, "grad_norm": 1.0047317743301392, "kl": 0.3896716237068176, "learning_rate": 6.470269586304583e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2445 }, { "completion_length": 181.6428680419922, "epoch": 2.347408829174664, "grad_norm": 1.384597659111023, "kl": 0.3498172163963318, "learning_rate": 6.452143679117964e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2446 }, { "completion_length": 177.2857208251953, "epoch": 2.3483685220729367, "grad_norm": 0.021793227642774582, "kl": 0.34011757373809814, "learning_rate": 6.43403943417131e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2447 }, { "completion_length": 181.07144165039062, "epoch": 2.349328214971209, "grad_norm": 0.998522162437439, "kl": 0.35558995604515076, "learning_rate": 6.415956872608774e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2448 }, { "completion_length": 208.2857208251953, "epoch": 2.3502879078694816, "grad_norm": 1.2046314477920532, "kl": 0.26034003496170044, "learning_rate": 6.39789601554917e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2449 }, { "completion_length": 173.57144165039062, "epoch": 2.3512476007677545, "grad_norm": 0.6332202553749084, "kl": 0.31931087374687195, "learning_rate": 6.379856884085966e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2450 }, { "completion_length": 165.42857360839844, "epoch": 2.352207293666027, "grad_norm": 0.018866756930947304, "kl": 0.3639250695705414, "learning_rate": 6.361839499287256e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2451 }, { "completion_length": 220.4285888671875, "epoch": 2.3531669865642995, "grad_norm": 0.006558616645634174, "kl": 0.2657568156719208, "learning_rate": 6.34384388219573e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2452 }, { "completion_length": 171.71429443359375, "epoch": 2.354126679462572, "grad_norm": 0.010868384502828121, "kl": 0.34725090861320496, "learning_rate": 6.325870053828675e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2453 }, { "completion_length": 157.7857208251953, "epoch": 2.3550863723608444, "grad_norm": 1.6376947164535522, "kl": 0.3337484300136566, "learning_rate": 6.307918035177912e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2454 }, { "completion_length": 174.07144165039062, "epoch": 2.3560460652591173, "grad_norm": 0.01002725213766098, "kl": 0.2827897071838379, "learning_rate": 6.289987847209784e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2455 }, { "completion_length": 173.2857208251953, "epoch": 2.3570057581573898, "grad_norm": 0.010055793449282646, "kl": 0.3145758807659149, "learning_rate": 6.272079510865177e-08, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.2857142984867096, "step": 2456 }, { "completion_length": 155.57144165039062, "epoch": 2.3579654510556622, "grad_norm": 0.6520972847938538, "kl": 0.33155950903892517, "learning_rate": 6.2541930470594e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2457 }, { "completion_length": 153.57144165039062, "epoch": 2.3589251439539347, "grad_norm": 0.033620093017816544, "kl": 0.4006862938404083, "learning_rate": 6.236328476682262e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2458 }, { "completion_length": 162.2857208251953, "epoch": 2.359884836852207, "grad_norm": 0.010905117727816105, "kl": 0.3593258261680603, "learning_rate": 6.218485820597983e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2459 }, { "completion_length": 195.50001525878906, "epoch": 2.36084452975048, "grad_norm": 0.011754229664802551, "kl": 0.28250831365585327, "learning_rate": 6.200665099645183e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2460 }, { "completion_length": 210.42857360839844, "epoch": 2.3618042226487526, "grad_norm": 0.5999040007591248, "kl": 0.28936702013015747, "learning_rate": 6.182866334636888e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2461 }, { "completion_length": 159.1428680419922, "epoch": 2.362763915547025, "grad_norm": 0.011983736418187618, "kl": 0.33385488390922546, "learning_rate": 6.165089546360457e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2462 }, { "completion_length": 242.71429443359375, "epoch": 2.3637236084452975, "grad_norm": 0.8277530074119568, "kl": 0.30558961629867554, "learning_rate": 6.147334755577596e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2463 }, { "completion_length": 198.57144165039062, "epoch": 2.36468330134357, "grad_norm": 0.006838356610387564, "kl": 0.2528032958507538, "learning_rate": 6.129601983024312e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2464 }, { "completion_length": 223.00001525878906, "epoch": 2.3656429942418424, "grad_norm": 0.008799941278994083, "kl": 0.25994673371315, "learning_rate": 6.111891249410892e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2465 }, { "completion_length": 146.21429443359375, "epoch": 2.3666026871401153, "grad_norm": 2.2898075580596924, "kl": 0.4077291190624237, "learning_rate": 6.094202575421906e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2466 }, { "completion_length": 176.21429443359375, "epoch": 2.367562380038388, "grad_norm": 0.012529137544333935, "kl": 0.34645920991897583, "learning_rate": 6.076535981716141e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2467 }, { "completion_length": 192.92857360839844, "epoch": 2.3685220729366603, "grad_norm": 0.6551617383956909, "kl": 0.2839953303337097, "learning_rate": 6.05889148892659e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2468 }, { "completion_length": 178.85714721679688, "epoch": 2.3694817658349328, "grad_norm": 1.9434329271316528, "kl": 0.3196873366832733, "learning_rate": 6.041269117660464e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2469 }, { "completion_length": 167.7857208251953, "epoch": 2.370441458733205, "grad_norm": 0.8634299039840698, "kl": 0.3473282754421234, "learning_rate": 6.023668888499109e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2470 }, { "completion_length": 146.0, "epoch": 2.3714011516314777, "grad_norm": 1.802504539489746, "kl": 0.36036765575408936, "learning_rate": 6.00609082199802e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2471 }, { "completion_length": 207.21429443359375, "epoch": 2.3723608445297506, "grad_norm": 0.007355821784585714, "kl": 0.24151450395584106, "learning_rate": 5.988534938686812e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2472 }, { "completion_length": 181.6428680419922, "epoch": 2.373320537428023, "grad_norm": 0.01064737606793642, "kl": 0.3206411302089691, "learning_rate": 5.971001259069178e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2473 }, { "completion_length": 189.35714721679688, "epoch": 2.3742802303262955, "grad_norm": 0.87726891040802, "kl": 0.31224697828292847, "learning_rate": 5.9534898036229105e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2474 }, { "completion_length": 192.71429443359375, "epoch": 2.375239923224568, "grad_norm": 0.6890006065368652, "kl": 0.2978019118309021, "learning_rate": 5.9360005927998115e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2475 }, { "completion_length": 197.85714721679688, "epoch": 2.3761996161228405, "grad_norm": 0.013473181053996086, "kl": 0.2983226776123047, "learning_rate": 5.9185336470257226e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2476 }, { "completion_length": 202.7857208251953, "epoch": 2.3771593090211134, "grad_norm": 0.9587998390197754, "kl": 0.2485167235136032, "learning_rate": 5.901088986700475e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2477 }, { "completion_length": 186.00001525878906, "epoch": 2.378119001919386, "grad_norm": 1.3745689392089844, "kl": 0.3075118064880371, "learning_rate": 5.8836666321978676e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2478 }, { "completion_length": 171.2857208251953, "epoch": 2.3790786948176583, "grad_norm": 1.9816193580627441, "kl": 0.28420597314834595, "learning_rate": 5.866266603865669e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2479 }, { "completion_length": 149.5, "epoch": 2.380038387715931, "grad_norm": 2.270756483078003, "kl": 0.4507063329219818, "learning_rate": 5.848888922025552e-08, "loss": 0.0005, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2480 }, { "completion_length": 162.42857360839844, "epoch": 2.3809980806142033, "grad_norm": 0.035485878586769104, "kl": 0.4166523516178131, "learning_rate": 5.831533606973088e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2481 }, { "completion_length": 178.6428680419922, "epoch": 2.381957773512476, "grad_norm": 0.6615705490112305, "kl": 0.2964332103729248, "learning_rate": 5.814200678977752e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2482 }, { "completion_length": 159.92857360839844, "epoch": 2.3829174664107486, "grad_norm": 0.006469566375017166, "kl": 0.2772570252418518, "learning_rate": 5.796890158282847e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2483 }, { "completion_length": 177.07144165039062, "epoch": 2.383877159309021, "grad_norm": 0.7075363993644714, "kl": 0.30004939436912537, "learning_rate": 5.779602065105518e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2484 }, { "completion_length": 233.00001525878906, "epoch": 2.3848368522072936, "grad_norm": 0.007545359432697296, "kl": 0.25124698877334595, "learning_rate": 5.7623364196367095e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2485 }, { "completion_length": 166.21429443359375, "epoch": 2.385796545105566, "grad_norm": 0.011945691891014576, "kl": 0.4161456227302551, "learning_rate": 5.7450932420411516e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2486 }, { "completion_length": 167.85714721679688, "epoch": 2.386756238003839, "grad_norm": 0.008566112257540226, "kl": 0.32534927129745483, "learning_rate": 5.7278725524573494e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2487 }, { "completion_length": 290.14288330078125, "epoch": 2.3877159309021114, "grad_norm": 0.006512672174721956, "kl": 0.20201271772384644, "learning_rate": 5.7106743709975205e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2488 }, { "completion_length": 158.0, "epoch": 2.388675623800384, "grad_norm": 1.1930043697357178, "kl": 0.3484293520450592, "learning_rate": 5.6934987177475994e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2489 }, { "completion_length": 166.5, "epoch": 2.3896353166986564, "grad_norm": 0.9120239019393921, "kl": 0.3258485794067383, "learning_rate": 5.6763456127672386e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2490 }, { "completion_length": 173.92857360839844, "epoch": 2.390595009596929, "grad_norm": 1.2399605512619019, "kl": 0.3308667838573456, "learning_rate": 5.659215076089702e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2491 }, { "completion_length": 158.85714721679688, "epoch": 2.3915547024952017, "grad_norm": 2.385868787765503, "kl": 0.40322133898735046, "learning_rate": 5.642107127721951e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2492 }, { "completion_length": 200.2857208251953, "epoch": 2.392514395393474, "grad_norm": 0.43659552931785583, "kl": 0.3589246869087219, "learning_rate": 5.625021787644532e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2493 }, { "completion_length": 150.85714721679688, "epoch": 2.3934740882917467, "grad_norm": 0.008162330836057663, "kl": 0.32325735688209534, "learning_rate": 5.6079590758115913e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2494 }, { "completion_length": 137.35714721679688, "epoch": 2.394433781190019, "grad_norm": 1.4589229822158813, "kl": 0.36533674597740173, "learning_rate": 5.590919012150863e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2495 }, { "completion_length": 218.50001525878906, "epoch": 2.3953934740882916, "grad_norm": 0.00761091522872448, "kl": 0.21330401301383972, "learning_rate": 5.57390161656362e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2496 }, { "completion_length": 162.2857208251953, "epoch": 2.3963531669865645, "grad_norm": 0.008374442346394062, "kl": 0.3543194532394409, "learning_rate": 5.556906908924655e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2497 }, { "completion_length": 226.2857208251953, "epoch": 2.397312859884837, "grad_norm": 0.010083980858325958, "kl": 0.25265175104141235, "learning_rate": 5.539934909082269e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2498 }, { "completion_length": 160.85714721679688, "epoch": 2.3982725527831095, "grad_norm": 1.648162841796875, "kl": 0.3080432415008545, "learning_rate": 5.5229856368582376e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2499 }, { "completion_length": 206.35714721679688, "epoch": 2.399232245681382, "grad_norm": 0.505326509475708, "kl": 0.33138418197631836, "learning_rate": 5.506059112047812e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2500 }, { "completion_length": 164.21429443359375, "epoch": 2.4001919385796544, "grad_norm": 0.012546426616609097, "kl": 0.35090604424476624, "learning_rate": 5.4891553544196526e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2501 }, { "completion_length": 187.6428680419922, "epoch": 2.401151631477927, "grad_norm": 0.02868373692035675, "kl": 0.35114315152168274, "learning_rate": 5.472274383715833e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2502 }, { "completion_length": 135.57144165039062, "epoch": 2.4021113243762, "grad_norm": 0.596782922744751, "kl": 0.41187870502471924, "learning_rate": 5.455416219651832e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2503 }, { "completion_length": 180.21429443359375, "epoch": 2.4030710172744723, "grad_norm": 0.9748349189758301, "kl": 0.32370853424072266, "learning_rate": 5.438580881916474e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2504 }, { "completion_length": 208.21429443359375, "epoch": 2.4040307101727447, "grad_norm": 1.2261738777160645, "kl": 0.3954087197780609, "learning_rate": 5.4217683901719324e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2505 }, { "completion_length": 164.42857360839844, "epoch": 2.404990403071017, "grad_norm": 1.75701105594635, "kl": 0.4163576662540436, "learning_rate": 5.4049787640536915e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2506 }, { "completion_length": 180.42857360839844, "epoch": 2.4059500959692897, "grad_norm": 0.011468841694295406, "kl": 0.2982240617275238, "learning_rate": 5.388212023170535e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2507 }, { "completion_length": 133.21429443359375, "epoch": 2.4069097888675626, "grad_norm": 1.4219629764556885, "kl": 0.42223307490348816, "learning_rate": 5.3714681871045275e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2508 }, { "completion_length": 168.57144165039062, "epoch": 2.407869481765835, "grad_norm": 0.9337486624717712, "kl": 0.3354059159755707, "learning_rate": 5.3547472754109675e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2509 }, { "completion_length": 164.2857208251953, "epoch": 2.4088291746641075, "grad_norm": 1.5079271793365479, "kl": 0.29671311378479004, "learning_rate": 5.338049307618389e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2510 }, { "completion_length": 181.92857360839844, "epoch": 2.40978886756238, "grad_norm": 1.142136573791504, "kl": 0.45739269256591797, "learning_rate": 5.321374303228529e-08, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2511 }, { "completion_length": 211.35714721679688, "epoch": 2.4107485604606524, "grad_norm": 0.9684876203536987, "kl": 0.3248114585876465, "learning_rate": 5.304722281716292e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2512 }, { "completion_length": 164.7857208251953, "epoch": 2.411708253358925, "grad_norm": 0.017805851995944977, "kl": 0.38499024510383606, "learning_rate": 5.2880932625297695e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2513 }, { "completion_length": 165.85714721679688, "epoch": 2.412667946257198, "grad_norm": 1.2988173961639404, "kl": 0.30097731947898865, "learning_rate": 5.271487265090163e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2514 }, { "completion_length": 157.85714721679688, "epoch": 2.4136276391554703, "grad_norm": 0.014436465688049793, "kl": 0.3697296679019928, "learning_rate": 5.2549043087917884e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2515 }, { "completion_length": 201.6428680419922, "epoch": 2.4145873320537428, "grad_norm": 1.1842402219772339, "kl": 0.3851720690727234, "learning_rate": 5.238344413002072e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2516 }, { "completion_length": 180.7857208251953, "epoch": 2.4155470249520152, "grad_norm": 1.3720343112945557, "kl": 0.3850937783718109, "learning_rate": 5.221807597061484e-08, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2517 }, { "completion_length": 178.35714721679688, "epoch": 2.4165067178502877, "grad_norm": 0.008544509299099445, "kl": 0.30204105377197266, "learning_rate": 5.205293880283551e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2518 }, { "completion_length": 206.2857208251953, "epoch": 2.4174664107485606, "grad_norm": 0.00887549202889204, "kl": 0.29443424940109253, "learning_rate": 5.1888032819548175e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2519 }, { "completion_length": 198.6428680419922, "epoch": 2.418426103646833, "grad_norm": 1.3894703388214111, "kl": 0.3084889352321625, "learning_rate": 5.172335821334828e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2520 }, { "completion_length": 212.85714721679688, "epoch": 2.4193857965451055, "grad_norm": 0.007832453586161137, "kl": 0.26815077662467957, "learning_rate": 5.1558915176561134e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2521 }, { "completion_length": 204.42857360839844, "epoch": 2.420345489443378, "grad_norm": 0.009603457525372505, "kl": 0.267591655254364, "learning_rate": 5.1394703901241507e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2522 }, { "completion_length": 253.1428680419922, "epoch": 2.4213051823416505, "grad_norm": 1.617789387702942, "kl": 0.24804186820983887, "learning_rate": 5.1230724579173404e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2523 }, { "completion_length": 199.2857208251953, "epoch": 2.4222648752399234, "grad_norm": 1.3822736740112305, "kl": 0.3070124685764313, "learning_rate": 5.106697740187024e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2524 }, { "completion_length": 185.00001525878906, "epoch": 2.423224568138196, "grad_norm": 0.016446253284811974, "kl": 0.3532412350177765, "learning_rate": 5.090346256057382e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2525 }, { "completion_length": 207.57144165039062, "epoch": 2.4241842610364683, "grad_norm": 1.4391915798187256, "kl": 0.28391167521476746, "learning_rate": 5.074018024625509e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2526 }, { "completion_length": 163.0, "epoch": 2.425143953934741, "grad_norm": 1.2671030759811401, "kl": 0.3602886497974396, "learning_rate": 5.0577130649613164e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2527 }, { "completion_length": 194.71429443359375, "epoch": 2.4261036468330133, "grad_norm": 0.7667418718338013, "kl": 0.3223041892051697, "learning_rate": 5.0414313961075314e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2528 }, { "completion_length": 200.35714721679688, "epoch": 2.427063339731286, "grad_norm": 1.4501936435699463, "kl": 0.2662181854248047, "learning_rate": 5.025173037079702e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2529 }, { "completion_length": 184.7857208251953, "epoch": 2.4280230326295587, "grad_norm": 1.1450773477554321, "kl": 0.294231653213501, "learning_rate": 5.0089380068661366e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2530 }, { "completion_length": 145.2857208251953, "epoch": 2.428982725527831, "grad_norm": 0.013263463973999023, "kl": 0.3533251881599426, "learning_rate": 4.992726324427901e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2531 }, { "completion_length": 224.21429443359375, "epoch": 2.4299424184261036, "grad_norm": 0.6540523767471313, "kl": 0.25836992263793945, "learning_rate": 4.9765380086987886e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2532 }, { "completion_length": 202.7857208251953, "epoch": 2.430902111324376, "grad_norm": 0.01716936007142067, "kl": 0.28131067752838135, "learning_rate": 4.960373078585303e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2533 }, { "completion_length": 175.1428680419922, "epoch": 2.431861804222649, "grad_norm": 0.5919933915138245, "kl": 0.34212809801101685, "learning_rate": 4.944231552966649e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2534 }, { "completion_length": 186.07144165039062, "epoch": 2.4328214971209214, "grad_norm": 1.6164283752441406, "kl": 0.3255561590194702, "learning_rate": 4.9281134506946876e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2535 }, { "completion_length": 172.50001525878906, "epoch": 2.433781190019194, "grad_norm": 0.007440268062055111, "kl": 0.35900118947029114, "learning_rate": 4.912018790593908e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2536 }, { "completion_length": 169.07144165039062, "epoch": 2.4347408829174664, "grad_norm": 0.02076583355665207, "kl": 0.40811023116111755, "learning_rate": 4.8959475914614554e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2537 }, { "completion_length": 190.35714721679688, "epoch": 2.435700575815739, "grad_norm": 0.008888263255357742, "kl": 0.2847234904766083, "learning_rate": 4.8798998720670446e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2538 }, { "completion_length": 238.57144165039062, "epoch": 2.4366602687140118, "grad_norm": 0.008178909309208393, "kl": 0.22666871547698975, "learning_rate": 4.863875651152985e-08, "loss": 0.0002, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2539 }, { "completion_length": 176.21429443359375, "epoch": 2.4376199616122842, "grad_norm": 0.9682276248931885, "kl": 0.31594330072402954, "learning_rate": 4.847874947434127e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2540 }, { "completion_length": 208.92857360839844, "epoch": 2.4385796545105567, "grad_norm": 0.017746377736330032, "kl": 0.28644412755966187, "learning_rate": 4.831897779597865e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2541 }, { "completion_length": 206.35714721679688, "epoch": 2.439539347408829, "grad_norm": 0.05255948007106781, "kl": 0.4349256455898285, "learning_rate": 4.815944166304112e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2542 }, { "completion_length": 177.92857360839844, "epoch": 2.4404990403071016, "grad_norm": 0.01231843326240778, "kl": 0.3469539284706116, "learning_rate": 4.8000141261852614e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2543 }, { "completion_length": 154.1428680419922, "epoch": 2.441458733205374, "grad_norm": 0.9333773851394653, "kl": 0.4332161247730255, "learning_rate": 4.784107677846175e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2544 }, { "completion_length": 283.0714416503906, "epoch": 2.442418426103647, "grad_norm": 0.02984721027314663, "kl": 0.34129223227500916, "learning_rate": 4.768224839864165e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2545 }, { "completion_length": 149.71429443359375, "epoch": 2.4433781190019195, "grad_norm": 1.7360296249389648, "kl": 0.4202519953250885, "learning_rate": 4.75236563078896e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2546 }, { "completion_length": 176.21429443359375, "epoch": 2.444337811900192, "grad_norm": 0.6649692058563232, "kl": 0.35296911001205444, "learning_rate": 4.736530069142716e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2547 }, { "completion_length": 145.42857360839844, "epoch": 2.4452975047984644, "grad_norm": 1.1773654222488403, "kl": 0.35725268721580505, "learning_rate": 4.7207181734199465e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2548 }, { "completion_length": 182.71429443359375, "epoch": 2.446257197696737, "grad_norm": 1.913022518157959, "kl": 0.32783243060112, "learning_rate": 4.7049299620875256e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2549 }, { "completion_length": 186.00001525878906, "epoch": 2.4472168905950094, "grad_norm": 0.008643184788525105, "kl": 0.3464217483997345, "learning_rate": 4.689165453584692e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2550 }, { "completion_length": 202.85714721679688, "epoch": 2.4481765834932823, "grad_norm": 0.0436628982424736, "kl": 0.3985244333744049, "learning_rate": 4.673424666322973e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2551 }, { "completion_length": 161.5, "epoch": 2.4491362763915547, "grad_norm": 1.5638095140457153, "kl": 0.389150470495224, "learning_rate": 4.6577076186862044e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2552 }, { "completion_length": 203.92857360839844, "epoch": 2.450095969289827, "grad_norm": 0.013795660808682442, "kl": 0.3229508399963379, "learning_rate": 4.642014329030497e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2553 }, { "completion_length": 169.21429443359375, "epoch": 2.4510556621880997, "grad_norm": 0.009433752857148647, "kl": 0.33564096689224243, "learning_rate": 4.626344815684199e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2554 }, { "completion_length": 133.71429443359375, "epoch": 2.452015355086372, "grad_norm": 1.8000882863998413, "kl": 0.42570745944976807, "learning_rate": 4.610699096947923e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2555 }, { "completion_length": 208.57144165039062, "epoch": 2.452975047984645, "grad_norm": 0.709203839302063, "kl": 0.2600846588611603, "learning_rate": 4.5950771910944596e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2556 }, { "completion_length": 187.71429443359375, "epoch": 2.4539347408829175, "grad_norm": 1.4143450260162354, "kl": 0.34470903873443604, "learning_rate": 4.579479116368795e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2557 }, { "completion_length": 145.57144165039062, "epoch": 2.45489443378119, "grad_norm": 0.012939107604324818, "kl": 0.4307747185230255, "learning_rate": 4.563904890988107e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2558 }, { "completion_length": 144.07144165039062, "epoch": 2.4558541266794625, "grad_norm": 1.4454097747802734, "kl": 0.43345677852630615, "learning_rate": 4.5483545331416764e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2559 }, { "completion_length": 204.00001525878906, "epoch": 2.456813819577735, "grad_norm": 1.3234285116195679, "kl": 0.32398882508277893, "learning_rate": 4.532828060990948e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2560 }, { "completion_length": 152.6428680419922, "epoch": 2.457773512476008, "grad_norm": 0.01966056413948536, "kl": 0.3959442675113678, "learning_rate": 4.517325492669452e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2561 }, { "completion_length": 216.6428680419922, "epoch": 2.4587332053742803, "grad_norm": 0.009683655574917793, "kl": 0.2773955464363098, "learning_rate": 4.5018468462827945e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2562 }, { "completion_length": 108.85714721679688, "epoch": 2.4596928982725528, "grad_norm": 1.5183258056640625, "kl": 0.5153037309646606, "learning_rate": 4.486392139908668e-08, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2563 }, { "completion_length": 161.85714721679688, "epoch": 2.4606525911708252, "grad_norm": 0.010527819395065308, "kl": 0.32673755288124084, "learning_rate": 4.470961391596784e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2564 }, { "completion_length": 178.35714721679688, "epoch": 2.4616122840690977, "grad_norm": 1.573642373085022, "kl": 0.3905397057533264, "learning_rate": 4.4555546193688734e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2565 }, { "completion_length": 181.1428680419922, "epoch": 2.4625719769673706, "grad_norm": 1.6299779415130615, "kl": 0.3721598982810974, "learning_rate": 4.440171841218673e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2566 }, { "completion_length": 202.35714721679688, "epoch": 2.463531669865643, "grad_norm": 0.08195416629314423, "kl": 0.4410248100757599, "learning_rate": 4.424813075111891e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2567 }, { "completion_length": 194.57144165039062, "epoch": 2.4644913627639156, "grad_norm": 0.011896919459104538, "kl": 0.32372570037841797, "learning_rate": 4.409478338986203e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2568 }, { "completion_length": 174.07144165039062, "epoch": 2.465451055662188, "grad_norm": 0.010131977498531342, "kl": 0.35007891058921814, "learning_rate": 4.394167650751207e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2569 }, { "completion_length": 155.6428680419922, "epoch": 2.4664107485604605, "grad_norm": 0.01002774853259325, "kl": 0.38772502541542053, "learning_rate": 4.3788810282884144e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2570 }, { "completion_length": 215.00001525878906, "epoch": 2.4673704414587334, "grad_norm": 0.013083051890134811, "kl": 0.3020890951156616, "learning_rate": 4.363618489451246e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2571 }, { "completion_length": 156.5, "epoch": 2.468330134357006, "grad_norm": 1.1489495038986206, "kl": 0.35990655422210693, "learning_rate": 4.348380052064979e-08, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2572 }, { "completion_length": 205.35714721679688, "epoch": 2.4692898272552783, "grad_norm": 0.6850758194923401, "kl": 0.2423323392868042, "learning_rate": 4.333165733926747e-08, "loss": 0.0002, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2573 }, { "completion_length": 162.92857360839844, "epoch": 2.470249520153551, "grad_norm": 0.012041708454489708, "kl": 0.3714865446090698, "learning_rate": 4.317975552805517e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2574 }, { "completion_length": 193.2857208251953, "epoch": 2.4712092130518233, "grad_norm": 0.010310503654181957, "kl": 0.30476319789886475, "learning_rate": 4.3028095264420525e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2575 }, { "completion_length": 186.07144165039062, "epoch": 2.472168905950096, "grad_norm": 0.022063761949539185, "kl": 0.33225807547569275, "learning_rate": 4.287667672548936e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2576 }, { "completion_length": 191.7857208251953, "epoch": 2.4731285988483687, "grad_norm": 0.008844816125929356, "kl": 0.31990641355514526, "learning_rate": 4.272550008810494e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2577 }, { "completion_length": 221.71429443359375, "epoch": 2.474088291746641, "grad_norm": 0.011114122346043587, "kl": 0.27252328395843506, "learning_rate": 4.257456552882807e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2578 }, { "completion_length": 254.6428680419922, "epoch": 2.4750479846449136, "grad_norm": 0.9988675713539124, "kl": 0.22727017104625702, "learning_rate": 4.242387322393681e-08, "loss": 0.0002, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2579 }, { "completion_length": 150.2857208251953, "epoch": 2.476007677543186, "grad_norm": 0.024524079635739326, "kl": 0.45339760184288025, "learning_rate": 4.227342334942629e-08, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2580 }, { "completion_length": 188.2857208251953, "epoch": 2.476967370441459, "grad_norm": 1.217787742614746, "kl": 0.2847225069999695, "learning_rate": 4.212321608100866e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2581 }, { "completion_length": 180.1428680419922, "epoch": 2.4779270633397315, "grad_norm": 1.4544703960418701, "kl": 0.3414210081100464, "learning_rate": 4.1973251594112495e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2582 }, { "completion_length": 201.35714721679688, "epoch": 2.478886756238004, "grad_norm": 1.2553447484970093, "kl": 0.34090176224708557, "learning_rate": 4.1823530063882894e-08, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.392857164144516, "step": 2583 }, { "completion_length": 164.7857208251953, "epoch": 2.4798464491362764, "grad_norm": 1.812833547592163, "kl": 0.3626802861690521, "learning_rate": 4.167405166518137e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2584 }, { "completion_length": 191.42857360839844, "epoch": 2.480806142034549, "grad_norm": 1.3257713317871094, "kl": 0.3168545961380005, "learning_rate": 4.152481657258522e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2585 }, { "completion_length": 141.85714721679688, "epoch": 2.4817658349328213, "grad_norm": 1.257641077041626, "kl": 0.4772416055202484, "learning_rate": 4.1375824960387776e-08, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2586 }, { "completion_length": 137.5, "epoch": 2.4827255278310942, "grad_norm": 0.01182861253619194, "kl": 0.4108017086982727, "learning_rate": 4.122707700259792e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2587 }, { "completion_length": 207.92857360839844, "epoch": 2.4836852207293667, "grad_norm": 0.007865730673074722, "kl": 0.2774807810783386, "learning_rate": 4.1078572872939896e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2588 }, { "completion_length": 207.35714721679688, "epoch": 2.484644913627639, "grad_norm": 0.056328605860471725, "kl": 0.40787678956985474, "learning_rate": 4.0930312744853394e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2589 }, { "completion_length": 159.85714721679688, "epoch": 2.4856046065259116, "grad_norm": 3.1386539936065674, "kl": 0.4377638101577759, "learning_rate": 4.078229679149297e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2590 }, { "completion_length": 201.50001525878906, "epoch": 2.486564299424184, "grad_norm": 0.054844170808792114, "kl": 0.33993664383888245, "learning_rate": 4.0634525185727965e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2591 }, { "completion_length": 185.92857360839844, "epoch": 2.4875239923224566, "grad_norm": 0.008563022129237652, "kl": 0.34217146039009094, "learning_rate": 4.048699810014261e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2592 }, { "completion_length": 154.6428680419922, "epoch": 2.4884836852207295, "grad_norm": 0.010772163048386574, "kl": 0.3875456750392914, "learning_rate": 4.0339715707035124e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2593 }, { "completion_length": 169.71429443359375, "epoch": 2.489443378119002, "grad_norm": 0.04059404507279396, "kl": 0.3734409213066101, "learning_rate": 4.019267817841834e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2594 }, { "completion_length": 152.35714721679688, "epoch": 2.4904030710172744, "grad_norm": 0.01469828374683857, "kl": 0.3615351915359497, "learning_rate": 4.0045885686018965e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2595 }, { "completion_length": 176.2857208251953, "epoch": 2.491362763915547, "grad_norm": 0.01391321886330843, "kl": 0.3318660855293274, "learning_rate": 3.9899338401277406e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2596 }, { "completion_length": 174.00001525878906, "epoch": 2.4923224568138194, "grad_norm": 1.2524551153182983, "kl": 0.33603137731552124, "learning_rate": 3.975303649534792e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2597 }, { "completion_length": 199.21429443359375, "epoch": 2.4932821497120923, "grad_norm": 0.017924239858984947, "kl": 0.3652800917625427, "learning_rate": 3.9606980139098075e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2598 }, { "completion_length": 176.7857208251953, "epoch": 2.4942418426103647, "grad_norm": 1.1573584079742432, "kl": 0.34444543719291687, "learning_rate": 3.94611695031086e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2599 }, { "completion_length": 188.00001525878906, "epoch": 2.495201535508637, "grad_norm": 0.016575125977396965, "kl": 0.33797687292099, "learning_rate": 3.9315604757673286e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2600 }, { "completion_length": 173.92857360839844, "epoch": 2.4961612284069097, "grad_norm": 0.0835808590054512, "kl": 0.45115911960601807, "learning_rate": 3.9170286072798734e-08, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2601 }, { "completion_length": 182.50001525878906, "epoch": 2.497120921305182, "grad_norm": 1.1358191967010498, "kl": 0.3655143678188324, "learning_rate": 3.902521361820432e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2602 }, { "completion_length": 194.7857208251953, "epoch": 2.498080614203455, "grad_norm": 0.009847207926213741, "kl": 0.28937435150146484, "learning_rate": 3.8880387563321626e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2603 }, { "completion_length": 191.42857360839844, "epoch": 2.4990403071017275, "grad_norm": 0.009310148656368256, "kl": 0.29927799105644226, "learning_rate": 3.873580807729451e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2604 }, { "completion_length": 232.21429443359375, "epoch": 2.5, "grad_norm": 0.028602181002497673, "kl": 0.3052334189414978, "learning_rate": 3.8591475328978995e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2605 }, { "completion_length": 176.42857360839844, "epoch": 2.5009596928982725, "grad_norm": 1.924063801765442, "kl": 0.3052733540534973, "learning_rate": 3.844738948694282e-08, "loss": 0.0003, "reward": 1.6785714626312256, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2606 }, { "completion_length": 166.21429443359375, "epoch": 2.501919385796545, "grad_norm": 0.0906614363193512, "kl": 0.4806661903858185, "learning_rate": 3.830355071946534e-08, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2607 }, { "completion_length": 189.2857208251953, "epoch": 2.502879078694818, "grad_norm": 0.014875533990561962, "kl": 0.35977765917778015, "learning_rate": 3.815995919453743e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2608 }, { "completion_length": 166.7857208251953, "epoch": 2.5038387715930903, "grad_norm": 0.007758957799524069, "kl": 0.35883742570877075, "learning_rate": 3.801661507986112e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2609 }, { "completion_length": 183.2857208251953, "epoch": 2.504798464491363, "grad_norm": 0.009560727514326572, "kl": 0.3465821146965027, "learning_rate": 3.787351854284959e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2610 }, { "completion_length": 197.85714721679688, "epoch": 2.5057581573896353, "grad_norm": 1.1955640316009521, "kl": 0.3857085406780243, "learning_rate": 3.773066975062683e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2611 }, { "completion_length": 210.92857360839844, "epoch": 2.5067178502879077, "grad_norm": 0.02440878376364708, "kl": 0.30946052074432373, "learning_rate": 3.758806887002744e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2612 }, { "completion_length": 152.35714721679688, "epoch": 2.5076775431861806, "grad_norm": 1.9535598754882812, "kl": 0.45201992988586426, "learning_rate": 3.74457160675965e-08, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2613 }, { "completion_length": 186.35714721679688, "epoch": 2.508637236084453, "grad_norm": 1.3088961839675903, "kl": 0.2597268223762512, "learning_rate": 3.7303611509589354e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2614 }, { "completion_length": 192.71429443359375, "epoch": 2.5095969289827256, "grad_norm": 0.00647449865937233, "kl": 0.3100576400756836, "learning_rate": 3.71617553619715e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2615 }, { "completion_length": 194.85714721679688, "epoch": 2.510556621880998, "grad_norm": 1.6439014673233032, "kl": 0.2643916606903076, "learning_rate": 3.702014779041826e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2616 }, { "completion_length": 218.2857208251953, "epoch": 2.5115163147792705, "grad_norm": 0.015093004330992699, "kl": 0.31051167845726013, "learning_rate": 3.687878896031452e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2617 }, { "completion_length": 187.21429443359375, "epoch": 2.5124760076775434, "grad_norm": 1.3180139064788818, "kl": 0.33560994267463684, "learning_rate": 3.67376790367549e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2618 }, { "completion_length": 201.85714721679688, "epoch": 2.513435700575816, "grad_norm": 0.006989854387938976, "kl": 0.27068960666656494, "learning_rate": 3.659681818454316e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2619 }, { "completion_length": 194.7857208251953, "epoch": 2.5143953934740884, "grad_norm": 0.9051700830459595, "kl": 0.2815491855144501, "learning_rate": 3.645620656819215e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2620 }, { "completion_length": 207.71429443359375, "epoch": 2.515355086372361, "grad_norm": 0.9090889096260071, "kl": 0.26127201318740845, "learning_rate": 3.631584435192372e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2621 }, { "completion_length": 203.71429443359375, "epoch": 2.5163147792706333, "grad_norm": 0.015369345434010029, "kl": 0.2922550141811371, "learning_rate": 3.617573169966837e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2622 }, { "completion_length": 237.35714721679688, "epoch": 2.517274472168906, "grad_norm": 0.016375890001654625, "kl": 0.25179097056388855, "learning_rate": 3.6035868775065207e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2623 }, { "completion_length": 186.57144165039062, "epoch": 2.5182341650671782, "grad_norm": 0.011692018248140812, "kl": 0.28586941957473755, "learning_rate": 3.589625574146163e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2624 }, { "completion_length": 148.57144165039062, "epoch": 2.519193857965451, "grad_norm": 2.019493579864502, "kl": 0.4195590317249298, "learning_rate": 3.575689276191313e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2625 }, { "completion_length": 158.1428680419922, "epoch": 2.5201535508637236, "grad_norm": 0.01424933597445488, "kl": 0.3663739264011383, "learning_rate": 3.561777999918339e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2626 }, { "completion_length": 184.35714721679688, "epoch": 2.521113243761996, "grad_norm": 1.4428977966308594, "kl": 0.2877374589443207, "learning_rate": 3.547891761574348e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2627 }, { "completion_length": 158.57144165039062, "epoch": 2.5220729366602685, "grad_norm": 1.6193922758102417, "kl": 0.35909679532051086, "learning_rate": 3.5340305773772425e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2628 }, { "completion_length": 203.7857208251953, "epoch": 2.523032629558541, "grad_norm": 1.66182541847229, "kl": 0.37331023812294006, "learning_rate": 3.520194463515641e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2629 }, { "completion_length": 164.5, "epoch": 2.523992322456814, "grad_norm": 0.008839497342705727, "kl": 0.36207202076911926, "learning_rate": 3.5063834361488855e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2630 }, { "completion_length": 214.35714721679688, "epoch": 2.5249520153550864, "grad_norm": 1.7942403554916382, "kl": 0.28823214769363403, "learning_rate": 3.492597511407033e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2631 }, { "completion_length": 185.07144165039062, "epoch": 2.525911708253359, "grad_norm": 1.1461976766586304, "kl": 0.4695564806461334, "learning_rate": 3.478836705390808e-08, "loss": 0.0005, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2632 }, { "completion_length": 163.85714721679688, "epoch": 2.5268714011516313, "grad_norm": 1.5815956592559814, "kl": 0.4054277539253235, "learning_rate": 3.465101034171603e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2633 }, { "completion_length": 193.21429443359375, "epoch": 2.527831094049904, "grad_norm": 1.0786861181259155, "kl": 0.29217827320098877, "learning_rate": 3.451390513791452e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2634 }, { "completion_length": 208.71429443359375, "epoch": 2.5287907869481767, "grad_norm": 1.5751498937606812, "kl": 0.30083411931991577, "learning_rate": 3.437705160263016e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2635 }, { "completion_length": 172.35714721679688, "epoch": 2.529750479846449, "grad_norm": 1.2634210586547852, "kl": 0.3183860182762146, "learning_rate": 3.42404498956958e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2636 }, { "completion_length": 175.92857360839844, "epoch": 2.5307101727447217, "grad_norm": 0.016136271879076958, "kl": 0.30917564034461975, "learning_rate": 3.4104100176649917e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2637 }, { "completion_length": 189.35714721679688, "epoch": 2.531669865642994, "grad_norm": 0.7458392381668091, "kl": 0.3866008520126343, "learning_rate": 3.396800260473678e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2638 }, { "completion_length": 206.6428680419922, "epoch": 2.5326295585412666, "grad_norm": 0.032528456300497055, "kl": 0.32214924693107605, "learning_rate": 3.383215733890635e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2639 }, { "completion_length": 112.64286041259766, "epoch": 2.5335892514395395, "grad_norm": 0.010660351254045963, "kl": 0.47657716274261475, "learning_rate": 3.3696564537813664e-08, "loss": 0.0005, "reward": 1.7857143878936768, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2640 }, { "completion_length": 191.7857208251953, "epoch": 2.534548944337812, "grad_norm": 1.1510628461837769, "kl": 0.3123399317264557, "learning_rate": 3.3561224359819004e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2641 }, { "completion_length": 189.85714721679688, "epoch": 2.5355086372360844, "grad_norm": 1.3260492086410522, "kl": 0.36465984582901, "learning_rate": 3.342613696298763e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2642 }, { "completion_length": 178.92857360839844, "epoch": 2.536468330134357, "grad_norm": 1.1749379634857178, "kl": 0.29855844378471375, "learning_rate": 3.329130250508952e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2643 }, { "completion_length": 187.21429443359375, "epoch": 2.5374280230326294, "grad_norm": 1.3729122877120972, "kl": 0.39023053646087646, "learning_rate": 3.315672114359935e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2644 }, { "completion_length": 151.42857360839844, "epoch": 2.5383877159309023, "grad_norm": 1.5778801441192627, "kl": 0.4100602865219116, "learning_rate": 3.302239303569609e-08, "loss": 0.0004, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2645 }, { "completion_length": 208.07144165039062, "epoch": 2.5393474088291748, "grad_norm": 0.01889113150537014, "kl": 0.32169240713119507, "learning_rate": 3.288831833826297e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2646 }, { "completion_length": 152.35714721679688, "epoch": 2.5403071017274472, "grad_norm": 1.6577115058898926, "kl": 0.3403840959072113, "learning_rate": 3.275449720788737e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2647 }, { "completion_length": 194.07144165039062, "epoch": 2.5412667946257197, "grad_norm": 2.0230166912078857, "kl": 0.3524166941642761, "learning_rate": 3.26209298008602e-08, "loss": 0.0004, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2648 }, { "completion_length": 186.85714721679688, "epoch": 2.542226487523992, "grad_norm": 0.01864808425307274, "kl": 0.3706800043582916, "learning_rate": 3.248761627317648e-08, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2649 }, { "completion_length": 177.7857208251953, "epoch": 2.543186180422265, "grad_norm": 0.06406549364328384, "kl": 0.41549065709114075, "learning_rate": 3.235455678053442e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2650 }, { "completion_length": 199.21429443359375, "epoch": 2.5441458733205375, "grad_norm": 0.020024849101901054, "kl": 0.3513204753398895, "learning_rate": 3.222175147833556e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2651 }, { "completion_length": 163.71429443359375, "epoch": 2.54510556621881, "grad_norm": 0.5335690379142761, "kl": 0.3755672574043274, "learning_rate": 3.208920052168476e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2652 }, { "completion_length": 171.7857208251953, "epoch": 2.5460652591170825, "grad_norm": 0.008239270187914371, "kl": 0.3418560326099396, "learning_rate": 3.195690406538973e-08, "loss": 0.0003, "reward": 1.7142858505249023, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2653 }, { "completion_length": 172.85714721679688, "epoch": 2.547024952015355, "grad_norm": 1.3064333200454712, "kl": 0.3267929255962372, "learning_rate": 3.182486226396067e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2654 }, { "completion_length": 214.57144165039062, "epoch": 2.547984644913628, "grad_norm": 0.011193578131496906, "kl": 0.254708468914032, "learning_rate": 3.169307527161086e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2655 }, { "completion_length": 171.50001525878906, "epoch": 2.5489443378119003, "grad_norm": 0.0372210256755352, "kl": 0.3971518278121948, "learning_rate": 3.1561543242255603e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2656 }, { "completion_length": 238.71429443359375, "epoch": 2.549904030710173, "grad_norm": 0.0375409796833992, "kl": 0.27347293496131897, "learning_rate": 3.143026632951265e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2657 }, { "completion_length": 217.42857360839844, "epoch": 2.5508637236084453, "grad_norm": 0.8128922581672668, "kl": 0.30345970392227173, "learning_rate": 3.12992446867017e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2658 }, { "completion_length": 155.1428680419922, "epoch": 2.5518234165067177, "grad_norm": 1.5112611055374146, "kl": 0.3244291841983795, "learning_rate": 3.1168478466844226e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2659 }, { "completion_length": 176.35714721679688, "epoch": 2.5527831094049906, "grad_norm": 1.7325489521026611, "kl": 0.33145007491111755, "learning_rate": 3.103796782266371e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2660 }, { "completion_length": 106.50000762939453, "epoch": 2.553742802303263, "grad_norm": 2.4604361057281494, "kl": 0.5321465134620667, "learning_rate": 3.09077129065847e-08, "loss": 0.0005, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2661 }, { "completion_length": 121.5714340209961, "epoch": 2.5547024952015356, "grad_norm": 0.009615978226065636, "kl": 0.48091310262680054, "learning_rate": 3.077771387073347e-08, "loss": 0.0005, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2662 }, { "completion_length": 206.57144165039062, "epoch": 2.555662188099808, "grad_norm": 0.009162578731775284, "kl": 0.3251745402812958, "learning_rate": 3.064797086693721e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2663 }, { "completion_length": 154.6428680419922, "epoch": 2.5566218809980805, "grad_norm": 0.834216296672821, "kl": 0.453585684299469, "learning_rate": 3.051848404672411e-08, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2664 }, { "completion_length": 162.42857360839844, "epoch": 2.5575815738963534, "grad_norm": 0.014734955504536629, "kl": 0.3392491042613983, "learning_rate": 3.0389253561323354e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2665 }, { "completion_length": 219.21429443359375, "epoch": 2.5585412667946255, "grad_norm": 1.840009331703186, "kl": 0.2957611083984375, "learning_rate": 3.026027956166452e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2666 }, { "completion_length": 144.07144165039062, "epoch": 2.5595009596928984, "grad_norm": 1.6640069484710693, "kl": 0.3898094892501831, "learning_rate": 3.013156219837776e-08, "loss": 0.0004, "reward": 1.6428571939468384, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2667 }, { "completion_length": 139.0, "epoch": 2.560460652591171, "grad_norm": 0.011775943450629711, "kl": 0.38298600912094116, "learning_rate": 3.000310162179342e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2668 }, { "completion_length": 187.07144165039062, "epoch": 2.5614203454894433, "grad_norm": 1.0242512226104736, "kl": 0.2971632480621338, "learning_rate": 2.9874897981941954e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2669 }, { "completion_length": 215.7857208251953, "epoch": 2.5623800383877158, "grad_norm": 0.008639813400804996, "kl": 0.258413702249527, "learning_rate": 2.974695142855388e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2670 }, { "completion_length": 152.07144165039062, "epoch": 2.5633397312859882, "grad_norm": 0.9869286417961121, "kl": 0.3279154896736145, "learning_rate": 2.961926211105928e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2671 }, { "completion_length": 144.57144165039062, "epoch": 2.564299424184261, "grad_norm": 1.0590990781784058, "kl": 0.3352360129356384, "learning_rate": 2.9491830178587807e-08, "loss": 0.0003, "reward": 1.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2672 }, { "completion_length": 190.6428680419922, "epoch": 2.5652591170825336, "grad_norm": 0.010669614188373089, "kl": 0.32751330733299255, "learning_rate": 2.9364655779968718e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2673 }, { "completion_length": 277.0714416503906, "epoch": 2.566218809980806, "grad_norm": 0.6659300923347473, "kl": 0.20930321514606476, "learning_rate": 2.923773906373034e-08, "loss": 0.0002, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2674 }, { "completion_length": 202.35714721679688, "epoch": 2.5671785028790786, "grad_norm": 0.00774071691557765, "kl": 0.27286285161972046, "learning_rate": 2.9111080178099878e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2675 }, { "completion_length": 173.6428680419922, "epoch": 2.568138195777351, "grad_norm": 0.008659768849611282, "kl": 0.34695693850517273, "learning_rate": 2.8984679271003815e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2676 }, { "completion_length": 191.21429443359375, "epoch": 2.569097888675624, "grad_norm": 1.7126470804214478, "kl": 0.3031390905380249, "learning_rate": 2.8858536490066977e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2677 }, { "completion_length": 210.6428680419922, "epoch": 2.5700575815738964, "grad_norm": 1.2109603881835938, "kl": 0.42963117361068726, "learning_rate": 2.8732651982612986e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2678 }, { "completion_length": 195.92857360839844, "epoch": 2.571017274472169, "grad_norm": 1.4593415260314941, "kl": 0.2652169167995453, "learning_rate": 2.860702589566366e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2679 }, { "completion_length": 197.1428680419922, "epoch": 2.5719769673704413, "grad_norm": 0.9965642094612122, "kl": 0.3722916543483734, "learning_rate": 2.8481658375938994e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2680 }, { "completion_length": 157.92857360839844, "epoch": 2.572936660268714, "grad_norm": 1.2441680431365967, "kl": 0.3728683888912201, "learning_rate": 2.8356549569857197e-08, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2681 }, { "completion_length": 129.71429443359375, "epoch": 2.5738963531669867, "grad_norm": 0.01492282934486866, "kl": 0.4146052598953247, "learning_rate": 2.8231699623534e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2682 }, { "completion_length": 223.6428680419922, "epoch": 2.574856046065259, "grad_norm": 1.1472474336624146, "kl": 0.2534049153327942, "learning_rate": 2.8107108682783127e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2683 }, { "completion_length": 178.85714721679688, "epoch": 2.5758157389635317, "grad_norm": 0.010303694754838943, "kl": 0.2955412268638611, "learning_rate": 2.7982776893115624e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2684 }, { "completion_length": 178.2857208251953, "epoch": 2.576775431861804, "grad_norm": 1.3746154308319092, "kl": 0.3125919997692108, "learning_rate": 2.785870439973989e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2685 }, { "completion_length": 158.5, "epoch": 2.5777351247600766, "grad_norm": 0.04593809321522713, "kl": 0.5031841993331909, "learning_rate": 2.77348913475616e-08, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2686 }, { "completion_length": 215.7857208251953, "epoch": 2.5786948176583495, "grad_norm": 1.5128505229949951, "kl": 0.3560214340686798, "learning_rate": 2.7611337881183334e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2687 }, { "completion_length": 161.1428680419922, "epoch": 2.579654510556622, "grad_norm": 0.87610924243927, "kl": 0.35037389397621155, "learning_rate": 2.7488044144904383e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2688 }, { "completion_length": 186.35714721679688, "epoch": 2.5806142034548945, "grad_norm": 1.1056838035583496, "kl": 0.3585208058357239, "learning_rate": 2.736501028272095e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2689 }, { "completion_length": 193.07144165039062, "epoch": 2.581573896353167, "grad_norm": 0.01842082478106022, "kl": 0.2691999077796936, "learning_rate": 2.7242236438325517e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2690 }, { "completion_length": 152.07144165039062, "epoch": 2.5825335892514394, "grad_norm": 0.010248672217130661, "kl": 0.36947983503341675, "learning_rate": 2.7119722755107044e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2691 }, { "completion_length": 166.42857360839844, "epoch": 2.5834932821497123, "grad_norm": 1.6823863983154297, "kl": 0.3587406873703003, "learning_rate": 2.6997469376150577e-08, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2692 }, { "completion_length": 155.92857360839844, "epoch": 2.5844529750479848, "grad_norm": 1.883918046951294, "kl": 0.35178273916244507, "learning_rate": 2.6875476444237017e-08, "loss": 0.0004, "reward": 1.7857143878936768, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2693 }, { "completion_length": 219.71429443359375, "epoch": 2.5854126679462572, "grad_norm": 0.7025629281997681, "kl": 0.2653576135635376, "learning_rate": 2.6753744101843443e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2694 }, { "completion_length": 175.1428680419922, "epoch": 2.5863723608445297, "grad_norm": 0.015056794509291649, "kl": 0.30350685119628906, "learning_rate": 2.6632272491142165e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2695 }, { "completion_length": 172.07144165039062, "epoch": 2.587332053742802, "grad_norm": 0.013272776268422604, "kl": 0.367918998003006, "learning_rate": 2.6511061754001196e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2696 }, { "completion_length": 228.1428680419922, "epoch": 2.588291746641075, "grad_norm": 0.010306659154593945, "kl": 0.2581914961338043, "learning_rate": 2.6390112031983937e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2697 }, { "completion_length": 169.57144165039062, "epoch": 2.5892514395393476, "grad_norm": 2.0714383125305176, "kl": 0.4178679287433624, "learning_rate": 2.6269423466348777e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2698 }, { "completion_length": 151.6428680419922, "epoch": 2.59021113243762, "grad_norm": 0.013564744032919407, "kl": 0.36601242423057556, "learning_rate": 2.6148996198049277e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2699 }, { "completion_length": 220.4285888671875, "epoch": 2.5911708253358925, "grad_norm": 0.6665008068084717, "kl": 0.23615705966949463, "learning_rate": 2.6028830367733706e-08, "loss": 0.0002, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2700 }, { "completion_length": 170.57144165039062, "epoch": 2.592130518234165, "grad_norm": 0.6749823093414307, "kl": 0.32991451025009155, "learning_rate": 2.5908926115744994e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2701 }, { "completion_length": 158.6428680419922, "epoch": 2.593090211132438, "grad_norm": 1.1779839992523193, "kl": 0.32799381017684937, "learning_rate": 2.5789283582120598e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2702 }, { "completion_length": 174.50001525878906, "epoch": 2.59404990403071, "grad_norm": 0.9615773558616638, "kl": 0.3082179129123688, "learning_rate": 2.5669902906592293e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2703 }, { "completion_length": 137.92857360839844, "epoch": 2.595009596928983, "grad_norm": 0.010010981932282448, "kl": 0.4597902297973633, "learning_rate": 2.5550784228586114e-08, "loss": 0.0005, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2704 }, { "completion_length": 178.42857360839844, "epoch": 2.5959692898272553, "grad_norm": 0.01243535615503788, "kl": 0.37844938039779663, "learning_rate": 2.5431927687222022e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2705 }, { "completion_length": 195.85714721679688, "epoch": 2.5969289827255277, "grad_norm": 0.007530194707214832, "kl": 0.2539064288139343, "learning_rate": 2.531333342131378e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2706 }, { "completion_length": 188.57144165039062, "epoch": 2.5978886756238007, "grad_norm": 0.015292883850634098, "kl": 0.2903260290622711, "learning_rate": 2.519500156936899e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2707 }, { "completion_length": 140.2857208251953, "epoch": 2.5988483685220727, "grad_norm": 0.029481103643774986, "kl": 0.4219878315925598, "learning_rate": 2.5076932269588708e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2708 }, { "completion_length": 207.92857360839844, "epoch": 2.5998080614203456, "grad_norm": 0.007209181785583496, "kl": 0.30908700823783875, "learning_rate": 2.4959125659867186e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2709 }, { "completion_length": 201.07144165039062, "epoch": 2.600767754318618, "grad_norm": 0.008378923870623112, "kl": 0.28945791721343994, "learning_rate": 2.4841581877792145e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2710 }, { "completion_length": 177.50001525878906, "epoch": 2.6017274472168905, "grad_norm": 1.1739884614944458, "kl": 0.30577486753463745, "learning_rate": 2.4724301060644158e-08, "loss": 0.0003, "reward": 1.7500001192092896, "reward_std": 0.15152287483215332, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2711 }, { "completion_length": 201.42857360839844, "epoch": 2.602687140115163, "grad_norm": 1.4408459663391113, "kl": 0.32086658477783203, "learning_rate": 2.460728334539683e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2712 }, { "completion_length": 177.50001525878906, "epoch": 2.6036468330134355, "grad_norm": 0.00667488481849432, "kl": 0.3142622709274292, "learning_rate": 2.4490528868716358e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2713 }, { "completion_length": 140.2857208251953, "epoch": 2.6046065259117084, "grad_norm": 1.1107723712921143, "kl": 0.38381683826446533, "learning_rate": 2.437403776696151e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2714 }, { "completion_length": 164.2857208251953, "epoch": 2.605566218809981, "grad_norm": 1.6583809852600098, "kl": 0.3728514015674591, "learning_rate": 2.4257810176183614e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2715 }, { "completion_length": 191.85714721679688, "epoch": 2.6065259117082533, "grad_norm": 0.6363492608070374, "kl": 0.31865549087524414, "learning_rate": 2.4141846232125977e-08, "loss": 0.0003, "reward": 1.8928571939468384, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2716 }, { "completion_length": 172.35714721679688, "epoch": 2.607485604606526, "grad_norm": 0.013156338594853878, "kl": 0.3892957270145416, "learning_rate": 2.402614607022413e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2717 }, { "completion_length": 197.2857208251953, "epoch": 2.6084452975047983, "grad_norm": 2.150883913040161, "kl": 0.3580634295940399, "learning_rate": 2.3910709825605642e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2718 }, { "completion_length": 191.50001525878906, "epoch": 2.609404990403071, "grad_norm": 0.007110409438610077, "kl": 0.2726563811302185, "learning_rate": 2.379553763308964e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2719 }, { "completion_length": 185.07144165039062, "epoch": 2.6103646833013436, "grad_norm": 0.008187614381313324, "kl": 0.2998386323451996, "learning_rate": 2.368062962718703e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.3571428656578064, "step": 2720 }, { "completion_length": 145.1428680419922, "epoch": 2.611324376199616, "grad_norm": 0.0220030564814806, "kl": 0.3853217661380768, "learning_rate": 2.356598594210013e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2721 }, { "completion_length": 149.2857208251953, "epoch": 2.6122840690978886, "grad_norm": 0.017689932137727737, "kl": 0.3459993600845337, "learning_rate": 2.345160671172239e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2722 }, { "completion_length": 198.6428680419922, "epoch": 2.613243761996161, "grad_norm": 0.01272562425583601, "kl": 0.3112486004829407, "learning_rate": 2.3337492069638642e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2723 }, { "completion_length": 179.07144165039062, "epoch": 2.614203454894434, "grad_norm": 1.6902916431427002, "kl": 0.34884870052337646, "learning_rate": 2.3223642149124563e-08, "loss": 0.0003, "reward": 1.821428656578064, "reward_std": 0.25253814458847046, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2724 }, { "completion_length": 188.57144165039062, "epoch": 2.6151631477927064, "grad_norm": 1.4165195226669312, "kl": 0.29914721846580505, "learning_rate": 2.3110057083146728e-08, "loss": 0.0003, "reward": 1.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2725 }, { "completion_length": 211.42857360839844, "epoch": 2.616122840690979, "grad_norm": 0.011637835763394833, "kl": 0.2784305214881897, "learning_rate": 2.2996737004362332e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2726 }, { "completion_length": 217.7857208251953, "epoch": 2.6170825335892514, "grad_norm": 0.010680891573429108, "kl": 0.2684333920478821, "learning_rate": 2.2883682045119062e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2727 }, { "completion_length": 179.92857360839844, "epoch": 2.618042226487524, "grad_norm": 0.717244565486908, "kl": 0.274018257856369, "learning_rate": 2.277089233745513e-08, "loss": 0.0003, "reward": 1.7857143878936768, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2728 }, { "completion_length": 198.1428680419922, "epoch": 2.6190019193857967, "grad_norm": 0.8890498280525208, "kl": 0.27930063009262085, "learning_rate": 2.2658368013098754e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2729 }, { "completion_length": 181.7857208251953, "epoch": 2.619961612284069, "grad_norm": 1.3140106201171875, "kl": 0.3572506308555603, "learning_rate": 2.2546109203468276e-08, "loss": 0.0004, "reward": 1.821428656578064, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 0.8571429252624512, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2730 }, { "completion_length": 238.07144165039062, "epoch": 2.6209213051823417, "grad_norm": 0.031388603150844574, "kl": 0.3244033753871918, "learning_rate": 2.2434116039672047e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2731 }, { "completion_length": 150.6428680419922, "epoch": 2.621880998080614, "grad_norm": 1.0729055404663086, "kl": 0.3923097550868988, "learning_rate": 2.232238865250796e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2732 }, { "completion_length": 181.1428680419922, "epoch": 2.6228406909788866, "grad_norm": 0.027822518721222878, "kl": 0.34478509426116943, "learning_rate": 2.2210927172463782e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2733 }, { "completion_length": 190.7857208251953, "epoch": 2.6238003838771595, "grad_norm": 1.540054440498352, "kl": 0.3555593192577362, "learning_rate": 2.2099731729716465e-08, "loss": 0.0004, "reward": 1.7142858505249023, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2734 }, { "completion_length": 181.85714721679688, "epoch": 2.624760076775432, "grad_norm": 0.02935117483139038, "kl": 0.3427380323410034, "learning_rate": 2.19888024541324e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2735 }, { "completion_length": 181.2857208251953, "epoch": 2.6257197696737045, "grad_norm": 0.016292758285999298, "kl": 0.3430865705013275, "learning_rate": 2.1878139475267037e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2736 }, { "completion_length": 160.07144165039062, "epoch": 2.626679462571977, "grad_norm": 0.012545167468488216, "kl": 0.30597978830337524, "learning_rate": 2.1767742922364885e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2737 }, { "completion_length": 166.0, "epoch": 2.6276391554702494, "grad_norm": 0.029167795553803444, "kl": 0.38646796345710754, "learning_rate": 2.165761292435922e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2738 }, { "completion_length": 196.07144165039062, "epoch": 2.6285988483685223, "grad_norm": 0.008547723293304443, "kl": 0.2921566367149353, "learning_rate": 2.15477496098721e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2739 }, { "completion_length": 176.1428680419922, "epoch": 2.629558541266795, "grad_norm": 0.015302461571991444, "kl": 0.33093976974487305, "learning_rate": 2.1438153107214e-08, "loss": 0.0003, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2740 }, { "completion_length": 201.85714721679688, "epoch": 2.6305182341650672, "grad_norm": 0.02324710041284561, "kl": 0.36292764544487, "learning_rate": 2.132882354438395e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2741 }, { "completion_length": 214.1428680419922, "epoch": 2.6314779270633397, "grad_norm": 0.7569425106048584, "kl": 0.295857310295105, "learning_rate": 2.1219761049069086e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2742 }, { "completion_length": 201.57144165039062, "epoch": 2.632437619961612, "grad_norm": 0.009626991115510464, "kl": 0.2679879069328308, "learning_rate": 2.1110965748644543e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2743 }, { "completion_length": 187.35714721679688, "epoch": 2.633397312859885, "grad_norm": 0.01325311604887247, "kl": 0.30805984139442444, "learning_rate": 2.100243777017366e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2744 }, { "completion_length": 171.00001525878906, "epoch": 2.634357005758157, "grad_norm": 0.9774345755577087, "kl": 0.35467758774757385, "learning_rate": 2.0894177240407345e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2745 }, { "completion_length": 177.50001525878906, "epoch": 2.63531669865643, "grad_norm": 1.2547019720077515, "kl": 0.3998193144798279, "learning_rate": 2.0786184285784298e-08, "loss": 0.0004, "reward": 1.8571429252624512, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4285714626312256, "step": 2746 }, { "completion_length": 188.21429443359375, "epoch": 2.6362763915547025, "grad_norm": 1.3295782804489136, "kl": 0.29780516028404236, "learning_rate": 2.0678459032430594e-08, "loss": 0.0003, "reward": 1.9642858505249023, "reward_std": 0.05050762742757797, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.4642857313156128, "step": 2747 }, { "completion_length": 193.7857208251953, "epoch": 2.637236084452975, "grad_norm": 1.3309779167175293, "kl": 0.3038172721862793, "learning_rate": 2.0571001606159683e-08, "loss": 0.0003, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2748 }, { "completion_length": 159.21429443359375, "epoch": 2.6381957773512474, "grad_norm": 0.6974461674690247, "kl": 0.36143991351127625, "learning_rate": 2.0463812132472363e-08, "loss": 0.0004, "reward": 1.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2749 }, { "completion_length": 152.57144165039062, "epoch": 2.63915547024952, "grad_norm": 0.026023628190159798, "kl": 0.3641926944255829, "learning_rate": 2.035689073655625e-08, "loss": 0.0004, "reward": 2.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "rewards/check_language_reward_func": 0.5, "rewards/check_similarity_func": 0.5, "step": 2750 }, { "completion_length": 199.21429443359375, "epoch": 2.640115163147793, "grad_norm": 0.015044273808598518, "kl": 0.3076804578304291, "learning_rate": 1.4753895647267907e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2751 }, { "completion_length": 185.7857208251953, "epoch": 2.6410748560460653, "grad_norm": 0.009411969222128391, "kl": 0.3141585886478424, "learning_rate": 1.4735415906105417e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2752 }, { "completion_length": 142.6428680419922, "epoch": 2.6420345489443378, "grad_norm": 1.374041199684143, "kl": 0.39737382531166077, "learning_rate": 1.4716942908259395e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2753 }, { "completion_length": 155.42857360839844, "epoch": 2.6429942418426102, "grad_norm": 0.03790688142180443, "kl": 0.4257901608943939, "learning_rate": 1.4698476665865678e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2754 }, { "completion_length": 199.6428680419922, "epoch": 2.6439539347408827, "grad_norm": 0.9320877194404602, "kl": 0.27288714051246643, "learning_rate": 1.468001719105565e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2755 }, { "completion_length": 153.2857208251953, "epoch": 2.6449136276391556, "grad_norm": 1.6647769212722778, "kl": 0.4356108009815216, "learning_rate": 1.4661564495956268e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2756 }, { "completion_length": 204.7857208251953, "epoch": 2.645873320537428, "grad_norm": 0.016051312908530235, "kl": 0.32741180062294006, "learning_rate": 1.464311859269003e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2757 }, { "completion_length": 165.21429443359375, "epoch": 2.6468330134357005, "grad_norm": 1.659217357635498, "kl": 0.34542328119277954, "learning_rate": 1.4624679493374958e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2758 }, { "completion_length": 178.92857360839844, "epoch": 2.647792706333973, "grad_norm": 1.7660809755325317, "kl": 0.41311463713645935, "learning_rate": 1.4606247210124624e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2759 }, { "completion_length": 193.35714721679688, "epoch": 2.6487523992322455, "grad_norm": 0.7103281617164612, "kl": 0.3142954707145691, "learning_rate": 1.4587821755048097e-07, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2760 }, { "completion_length": 174.71429443359375, "epoch": 2.6497120921305184, "grad_norm": 0.012095017358660698, "kl": 0.3066413104534149, "learning_rate": 1.4569403140249988e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2761 }, { "completion_length": 239.21429443359375, "epoch": 2.650671785028791, "grad_norm": 0.006067273672670126, "kl": 0.23015964031219482, "learning_rate": 1.4550991377830423e-07, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2762 }, { "completion_length": 225.07144165039062, "epoch": 2.6516314779270633, "grad_norm": 0.026127375662326813, "kl": 0.2969294488430023, "learning_rate": 1.453258647988496e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2763 }, { "completion_length": 136.57144165039062, "epoch": 2.652591170825336, "grad_norm": 1.9809560775756836, "kl": 0.3779466450214386, "learning_rate": 1.4514188458504724e-07, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 2764 }, { "completion_length": 155.85714721679688, "epoch": 2.6535508637236083, "grad_norm": 1.9871480464935303, "kl": 0.3493293523788452, "learning_rate": 1.4495797325776287e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2765 }, { "completion_length": 181.50001525878906, "epoch": 2.654510556621881, "grad_norm": 3.5679609775543213, "kl": 0.36670172214508057, "learning_rate": 1.4477413093781703e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2766 }, { "completion_length": 189.21429443359375, "epoch": 2.6554702495201536, "grad_norm": 1.3796483278274536, "kl": 0.37429648637771606, "learning_rate": 1.4459035774598472e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2767 }, { "completion_length": 203.42857360839844, "epoch": 2.656429942418426, "grad_norm": 0.015395427122712135, "kl": 0.37067773938179016, "learning_rate": 1.4440665380299593e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2768 }, { "completion_length": 197.7857208251953, "epoch": 2.6573896353166986, "grad_norm": 1.2390697002410889, "kl": 0.3999790549278259, "learning_rate": 1.4422301922953473e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2769 }, { "completion_length": 206.21429443359375, "epoch": 2.658349328214971, "grad_norm": 0.030769310891628265, "kl": 0.30279478430747986, "learning_rate": 1.4403945414623988e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2770 }, { "completion_length": 157.07144165039062, "epoch": 2.659309021113244, "grad_norm": 0.023457065224647522, "kl": 0.3956056833267212, "learning_rate": 1.4385595867370467e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2771 }, { "completion_length": 183.35714721679688, "epoch": 2.6602687140115164, "grad_norm": 1.3956339359283447, "kl": 0.31298741698265076, "learning_rate": 1.43672532932476e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2772 }, { "completion_length": 172.00001525878906, "epoch": 2.661228406909789, "grad_norm": 0.010316155850887299, "kl": 0.36888307332992554, "learning_rate": 1.4348917704305567e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2773 }, { "completion_length": 217.35714721679688, "epoch": 2.6621880998080614, "grad_norm": 0.03503849357366562, "kl": 0.3216448724269867, "learning_rate": 1.433058911258991e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2774 }, { "completion_length": 171.71429443359375, "epoch": 2.663147792706334, "grad_norm": 0.024785298854112625, "kl": 0.37765225768089294, "learning_rate": 1.43122675301416e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2775 }, { "completion_length": 199.35714721679688, "epoch": 2.6641074856046068, "grad_norm": 0.010942786931991577, "kl": 0.34264907240867615, "learning_rate": 1.4293952968997022e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2776 }, { "completion_length": 159.6428680419922, "epoch": 2.665067178502879, "grad_norm": 1.21259343624115, "kl": 0.4088183045387268, "learning_rate": 1.4275645441187884e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2777 }, { "completion_length": 133.07144165039062, "epoch": 2.6660268714011517, "grad_norm": 0.024352816864848137, "kl": 0.4771319627761841, "learning_rate": 1.4257344958741325e-07, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2778 }, { "completion_length": 142.42857360839844, "epoch": 2.666986564299424, "grad_norm": 0.015151840634644032, "kl": 0.4321398138999939, "learning_rate": 1.4239051533679863e-07, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 2779 }, { "completion_length": 249.21429443359375, "epoch": 2.6679462571976966, "grad_norm": 0.9975098371505737, "kl": 0.27214357256889343, "learning_rate": 1.4220765178021343e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2780 }, { "completion_length": 173.71429443359375, "epoch": 2.6689059500959695, "grad_norm": 0.7608722448348999, "kl": 0.316139817237854, "learning_rate": 1.4202485903778976e-07, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2781 }, { "completion_length": 185.7857208251953, "epoch": 2.669865642994242, "grad_norm": 0.008742475882172585, "kl": 0.30364975333213806, "learning_rate": 1.4184213722961348e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2782 }, { "completion_length": 170.5, "epoch": 2.6708253358925145, "grad_norm": 1.5333679914474487, "kl": 0.3237568438053131, "learning_rate": 1.416594864757234e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2783 }, { "completion_length": 155.7857208251953, "epoch": 2.671785028790787, "grad_norm": 1.4184068441390991, "kl": 0.3766843378543854, "learning_rate": 1.4147690689611215e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2784 }, { "completion_length": 162.92857360839844, "epoch": 2.6727447216890594, "grad_norm": 2.5034165382385254, "kl": 0.35641756653785706, "learning_rate": 1.412943986107252e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2785 }, { "completion_length": 199.92857360839844, "epoch": 2.6737044145873323, "grad_norm": 0.012394963763654232, "kl": 0.31480228900909424, "learning_rate": 1.4111196173946126e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2786 }, { "completion_length": 141.85714721679688, "epoch": 2.6746641074856043, "grad_norm": 1.306149959564209, "kl": 0.41751086711883545, "learning_rate": 1.4092959640217243e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2787 }, { "completion_length": 241.50001525878906, "epoch": 2.6756238003838773, "grad_norm": 0.006977958604693413, "kl": 0.2350100427865982, "learning_rate": 1.407473027186633e-07, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2788 }, { "completion_length": 187.71429443359375, "epoch": 2.6765834932821497, "grad_norm": 0.016077551990747452, "kl": 0.31233808398246765, "learning_rate": 1.4056508080869183e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2789 }, { "completion_length": 213.2857208251953, "epoch": 2.677543186180422, "grad_norm": 1.6567825078964233, "kl": 0.3044937252998352, "learning_rate": 1.403829307919689e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2790 }, { "completion_length": 191.1428680419922, "epoch": 2.6785028790786947, "grad_norm": 0.008753102272748947, "kl": 0.3105844259262085, "learning_rate": 1.4020085278815743e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2791 }, { "completion_length": 194.07144165039062, "epoch": 2.679462571976967, "grad_norm": 1.105292558670044, "kl": 0.3728746771812439, "learning_rate": 1.400188469168738e-07, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2792 }, { "completion_length": 176.1428680419922, "epoch": 2.68042226487524, "grad_norm": 1.8664442300796509, "kl": 0.3775957226753235, "learning_rate": 1.398369132976868e-07, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 2793 }, { "completion_length": 179.57144165039062, "epoch": 2.6813819577735125, "grad_norm": 1.1998690366744995, "kl": 0.5238566994667053, "learning_rate": 1.3965505205011758e-07, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2794 }, { "completion_length": 130.85714721679688, "epoch": 2.682341650671785, "grad_norm": 1.5882954597473145, "kl": 0.4664212167263031, "learning_rate": 1.394732632936398e-07, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2795 }, { "completion_length": 182.7857208251953, "epoch": 2.6833013435700575, "grad_norm": 0.8786811232566833, "kl": 0.34253624081611633, "learning_rate": 1.3929154714767966e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2796 }, { "completion_length": 178.1428680419922, "epoch": 2.68426103646833, "grad_norm": 0.5253918170928955, "kl": 0.3588029146194458, "learning_rate": 1.3910990373161542e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2797 }, { "completion_length": 184.92857360839844, "epoch": 2.685220729366603, "grad_norm": 1.1585235595703125, "kl": 0.4599039554595947, "learning_rate": 1.3892833316477787e-07, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2798 }, { "completion_length": 147.42857360839844, "epoch": 2.6861804222648753, "grad_norm": 1.171892762184143, "kl": 0.3706403076648712, "learning_rate": 1.387468355664496e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2799 }, { "completion_length": 188.71429443359375, "epoch": 2.6871401151631478, "grad_norm": 1.582874059677124, "kl": 0.38603857159614563, "learning_rate": 1.3856541105586545e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2800 }, { "completion_length": 190.00001525878906, "epoch": 2.6880998080614202, "grad_norm": 0.9906087517738342, "kl": 0.28758445382118225, "learning_rate": 1.383840597522123e-07, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2801 }, { "completion_length": 231.00001525878906, "epoch": 2.6890595009596927, "grad_norm": 1.0322738885879517, "kl": 0.26884281635284424, "learning_rate": 1.3820278177462869e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2802 }, { "completion_length": 267.4285888671875, "epoch": 2.6900191938579656, "grad_norm": 0.008512506261467934, "kl": 0.23251482844352722, "learning_rate": 1.3802157724220522e-07, "loss": 0.0002, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2803 }, { "completion_length": 162.42857360839844, "epoch": 2.690978886756238, "grad_norm": 0.843159019947052, "kl": 0.3541811406612396, "learning_rate": 1.3784044627398445e-07, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2804 }, { "completion_length": 197.57144165039062, "epoch": 2.6919385796545106, "grad_norm": 0.7316362261772156, "kl": 0.3164566159248352, "learning_rate": 1.3765938898895984e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2805 }, { "completion_length": 186.1428680419922, "epoch": 2.692898272552783, "grad_norm": 0.013240745291113853, "kl": 0.3638612926006317, "learning_rate": 1.3747840550607722e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2806 }, { "completion_length": 224.4285888671875, "epoch": 2.6938579654510555, "grad_norm": 0.008222626522183418, "kl": 0.25527647137641907, "learning_rate": 1.372974959442337e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2807 }, { "completion_length": 175.42857360839844, "epoch": 2.6948176583493284, "grad_norm": 1.354364275932312, "kl": 0.3804764449596405, "learning_rate": 1.371166604222777e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2808 }, { "completion_length": 157.0, "epoch": 2.695777351247601, "grad_norm": 1.9070168733596802, "kl": 0.3848685026168823, "learning_rate": 1.3693589905900909e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2809 }, { "completion_length": 170.92857360839844, "epoch": 2.6967370441458733, "grad_norm": 1.277032494544983, "kl": 0.7307388186454773, "learning_rate": 1.3675521197317887e-07, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2810 }, { "completion_length": 201.71429443359375, "epoch": 2.697696737044146, "grad_norm": 0.014024324715137482, "kl": 0.32632651925086975, "learning_rate": 1.3657459928348953e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2811 }, { "completion_length": 204.6428680419922, "epoch": 2.6986564299424183, "grad_norm": 0.7453172206878662, "kl": 0.28459763526916504, "learning_rate": 1.363940611085946e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2812 }, { "completion_length": 216.1428680419922, "epoch": 2.699616122840691, "grad_norm": 1.0269263982772827, "kl": 0.3083053529262543, "learning_rate": 1.3621359756709854e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2813 }, { "completion_length": 165.21429443359375, "epoch": 2.7005758157389637, "grad_norm": 1.4405932426452637, "kl": 0.30538150668144226, "learning_rate": 1.3603320877755674e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2814 }, { "completion_length": 146.35714721679688, "epoch": 2.701535508637236, "grad_norm": 0.028094228357076645, "kl": 0.5091022253036499, "learning_rate": 1.3585289485847573e-07, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2815 }, { "completion_length": 279.71429443359375, "epoch": 2.7024952015355086, "grad_norm": 0.009823258966207504, "kl": 0.2272759974002838, "learning_rate": 1.356726559283125e-07, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2816 }, { "completion_length": 155.57144165039062, "epoch": 2.703454894433781, "grad_norm": 1.211605191230774, "kl": 0.3743121027946472, "learning_rate": 1.3549249210547518e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2817 }, { "completion_length": 217.1428680419922, "epoch": 2.704414587332054, "grad_norm": 0.04476546496152878, "kl": 0.40244969725608826, "learning_rate": 1.3531240350832244e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2818 }, { "completion_length": 186.6428680419922, "epoch": 2.7053742802303264, "grad_norm": 0.035028327256441116, "kl": 0.39952054619789124, "learning_rate": 1.351323902551631e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2819 }, { "completion_length": 217.42857360839844, "epoch": 2.706333973128599, "grad_norm": 0.013666168786585331, "kl": 0.31189605593681335, "learning_rate": 1.34952452464257e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2820 }, { "completion_length": 192.07144165039062, "epoch": 2.7072936660268714, "grad_norm": 1.5524163246154785, "kl": 0.32389765977859497, "learning_rate": 1.3477259025381444e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2821 }, { "completion_length": 212.71429443359375, "epoch": 2.708253358925144, "grad_norm": 1.121906042098999, "kl": 0.2961971163749695, "learning_rate": 1.3459280374199561e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2822 }, { "completion_length": 156.7857208251953, "epoch": 2.7092130518234168, "grad_norm": 0.016223153099417686, "kl": 0.3532378375530243, "learning_rate": 1.3441309304691135e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2823 }, { "completion_length": 164.85714721679688, "epoch": 2.710172744721689, "grad_norm": 0.9447034597396851, "kl": 0.40392953157424927, "learning_rate": 1.3423345828662235e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2824 }, { "completion_length": 142.42857360839844, "epoch": 2.7111324376199617, "grad_norm": 0.022091619670391083, "kl": 0.45057618618011475, "learning_rate": 1.3405389957913985e-07, "loss": 0.0005, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 2825 }, { "completion_length": 230.4285888671875, "epoch": 2.712092130518234, "grad_norm": 0.026912463828921318, "kl": 0.2738149166107178, "learning_rate": 1.3387441704242492e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2826 }, { "completion_length": 194.6428680419922, "epoch": 2.7130518234165066, "grad_norm": 0.010303995572030544, "kl": 0.3136683404445648, "learning_rate": 1.336950107943885e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2827 }, { "completion_length": 167.85714721679688, "epoch": 2.714011516314779, "grad_norm": 1.0813443660736084, "kl": 0.31934410333633423, "learning_rate": 1.335156809528914e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2828 }, { "completion_length": 139.21429443359375, "epoch": 2.7149712092130516, "grad_norm": 0.033810101449489594, "kl": 0.4108911454677582, "learning_rate": 1.3333642763574453e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2829 }, { "completion_length": 171.1428680419922, "epoch": 2.7159309021113245, "grad_norm": 0.7537158131599426, "kl": 0.3849257230758667, "learning_rate": 1.3315725096070812e-07, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2830 }, { "completion_length": 162.07144165039062, "epoch": 2.716890595009597, "grad_norm": 0.012203626334667206, "kl": 0.45252665877342224, "learning_rate": 1.3297815104549232e-07, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2831 }, { "completion_length": 169.42857360839844, "epoch": 2.7178502879078694, "grad_norm": 0.012444501742720604, "kl": 0.33429625630378723, "learning_rate": 1.3279912800775703e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2832 }, { "completion_length": 206.35714721679688, "epoch": 2.718809980806142, "grad_norm": 1.065972924232483, "kl": 0.3666207194328308, "learning_rate": 1.3262018196511093e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2833 }, { "completion_length": 232.00001525878906, "epoch": 2.7197696737044144, "grad_norm": 0.028709515929222107, "kl": 0.35846397280693054, "learning_rate": 1.3244131303511297e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2834 }, { "completion_length": 184.1428680419922, "epoch": 2.7207293666026873, "grad_norm": 0.015335365198552608, "kl": 0.36465829610824585, "learning_rate": 1.322625213352708e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2835 }, { "completion_length": 173.50001525878906, "epoch": 2.7216890595009597, "grad_norm": 0.990829348564148, "kl": 0.3824087977409363, "learning_rate": 1.320838069830418e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2836 }, { "completion_length": 167.42857360839844, "epoch": 2.722648752399232, "grad_norm": 1.244038701057434, "kl": 0.33300095796585083, "learning_rate": 1.3190517009583225e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2837 }, { "completion_length": 188.85714721679688, "epoch": 2.7236084452975047, "grad_norm": 0.014327917248010635, "kl": 0.32741647958755493, "learning_rate": 1.317266107909975e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2838 }, { "completion_length": 215.6428680419922, "epoch": 2.724568138195777, "grad_norm": 1.3249722719192505, "kl": 0.34210270643234253, "learning_rate": 1.3154812918584218e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2839 }, { "completion_length": 225.07144165039062, "epoch": 2.72552783109405, "grad_norm": 0.010803124867379665, "kl": 0.289878785610199, "learning_rate": 1.3136972539761976e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2840 }, { "completion_length": 175.85714721679688, "epoch": 2.7264875239923225, "grad_norm": 0.0155625706538558, "kl": 0.3748430609703064, "learning_rate": 1.311913995435326e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2841 }, { "completion_length": 179.57144165039062, "epoch": 2.727447216890595, "grad_norm": 0.9883757829666138, "kl": 0.3017600178718567, "learning_rate": 1.310131517407316e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2842 }, { "completion_length": 173.07144165039062, "epoch": 2.7284069097888675, "grad_norm": 0.7407815456390381, "kl": 0.3180689215660095, "learning_rate": 1.3083498210631694e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2843 }, { "completion_length": 164.5, "epoch": 2.72936660268714, "grad_norm": 1.2136067152023315, "kl": 0.4109412431716919, "learning_rate": 1.3065689075733682e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2844 }, { "completion_length": 221.4285888671875, "epoch": 2.730326295585413, "grad_norm": 1.4487700462341309, "kl": 0.3713516294956207, "learning_rate": 1.3047887781078858e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2845 }, { "completion_length": 173.07144165039062, "epoch": 2.7312859884836853, "grad_norm": 1.4458485841751099, "kl": 0.36433032155036926, "learning_rate": 1.3030094338361768e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2846 }, { "completion_length": 100.71428680419922, "epoch": 2.732245681381958, "grad_norm": 0.03069925121963024, "kl": 0.5142654776573181, "learning_rate": 1.3012308759271803e-07, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2847 }, { "completion_length": 160.7857208251953, "epoch": 2.7332053742802302, "grad_norm": 0.7585576772689819, "kl": 0.38983216881752014, "learning_rate": 1.2994531055493213e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2848 }, { "completion_length": 160.7857208251953, "epoch": 2.7341650671785027, "grad_norm": 0.026359910145401955, "kl": 0.4338759481906891, "learning_rate": 1.2976761238705033e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2849 }, { "completion_length": 132.6428680419922, "epoch": 2.7351247600767756, "grad_norm": 0.008724709041416645, "kl": 0.3737490475177765, "learning_rate": 1.2958999320581165e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2850 }, { "completion_length": 209.57144165039062, "epoch": 2.736084452975048, "grad_norm": 1.3785933256149292, "kl": 0.36285513639450073, "learning_rate": 1.294124531279029e-07, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2851 }, { "completion_length": 179.35714721679688, "epoch": 2.7370441458733206, "grad_norm": 1.1348114013671875, "kl": 0.3422190248966217, "learning_rate": 1.2923499226995883e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2852 }, { "completion_length": 203.6428680419922, "epoch": 2.738003838771593, "grad_norm": 0.7373332381248474, "kl": 0.3260611593723297, "learning_rate": 1.2905761074856246e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2853 }, { "completion_length": 181.6428680419922, "epoch": 2.7389635316698655, "grad_norm": 0.011278927326202393, "kl": 0.3296167552471161, "learning_rate": 1.2888030868024467e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2854 }, { "completion_length": 231.4285888671875, "epoch": 2.7399232245681384, "grad_norm": 0.014189202338457108, "kl": 0.29988545179367065, "learning_rate": 1.2870308618148386e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2855 }, { "completion_length": 223.50001525878906, "epoch": 2.740882917466411, "grad_norm": 0.0116123603656888, "kl": 0.25840598344802856, "learning_rate": 1.2852594336870627e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2856 }, { "completion_length": 191.1428680419922, "epoch": 2.7418426103646834, "grad_norm": 0.847985565662384, "kl": 0.3463500440120697, "learning_rate": 1.2834888035828596e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2857 }, { "completion_length": 178.1428680419922, "epoch": 2.742802303262956, "grad_norm": 0.010543475858867168, "kl": 0.37763670086860657, "learning_rate": 1.2817189726654432e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2858 }, { "completion_length": 225.07144165039062, "epoch": 2.7437619961612283, "grad_norm": 0.008748432621359825, "kl": 0.2655101716518402, "learning_rate": 1.279949942097505e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2859 }, { "completion_length": 163.5, "epoch": 2.744721689059501, "grad_norm": 0.012052973732352257, "kl": 0.36792001128196716, "learning_rate": 1.2781817130412088e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2860 }, { "completion_length": 201.07144165039062, "epoch": 2.7456813819577737, "grad_norm": 1.771880865097046, "kl": 0.40562835335731506, "learning_rate": 1.2764142866581908e-07, "loss": 0.0004, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 2861 }, { "completion_length": 204.00001525878906, "epoch": 2.746641074856046, "grad_norm": 0.009436461143195629, "kl": 0.27951791882514954, "learning_rate": 1.2746476641095639e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2862 }, { "completion_length": 179.35714721679688, "epoch": 2.7476007677543186, "grad_norm": 1.273500919342041, "kl": 0.40117108821868896, "learning_rate": 1.272881846555908e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2863 }, { "completion_length": 157.35714721679688, "epoch": 2.748560460652591, "grad_norm": 1.6795209646224976, "kl": 0.34980687499046326, "learning_rate": 1.2711168351572786e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2864 }, { "completion_length": 160.57144165039062, "epoch": 2.749520153550864, "grad_norm": 0.9225772619247437, "kl": 0.41083136200904846, "learning_rate": 1.269352631073199e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2865 }, { "completion_length": 184.92857360839844, "epoch": 2.750479846449136, "grad_norm": 0.009852183982729912, "kl": 0.31730347871780396, "learning_rate": 1.2675892354626612e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2866 }, { "completion_length": 162.92857360839844, "epoch": 2.751439539347409, "grad_norm": 1.1259417533874512, "kl": 0.4262811541557312, "learning_rate": 1.265826649484129e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2867 }, { "completion_length": 210.2857208251953, "epoch": 2.7523992322456814, "grad_norm": 0.01651613414287567, "kl": 0.3217787444591522, "learning_rate": 1.264064874295534e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2868 }, { "completion_length": 180.71429443359375, "epoch": 2.753358925143954, "grad_norm": 0.007710406556725502, "kl": 0.2842598557472229, "learning_rate": 1.2623039110542728e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2869 }, { "completion_length": 163.85714721679688, "epoch": 2.7543186180422263, "grad_norm": 1.1908659934997559, "kl": 0.3543247580528259, "learning_rate": 1.2605437609172098e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2870 }, { "completion_length": 178.6428680419922, "epoch": 2.755278310940499, "grad_norm": 0.007629552856087685, "kl": 0.3297445774078369, "learning_rate": 1.2587844250406743e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2871 }, { "completion_length": 161.5, "epoch": 2.7562380038387717, "grad_norm": 0.010141966864466667, "kl": 0.4424491822719574, "learning_rate": 1.2570259045804627e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2872 }, { "completion_length": 153.92857360839844, "epoch": 2.757197696737044, "grad_norm": 1.3673210144042969, "kl": 0.37390437722206116, "learning_rate": 1.2552682006918358e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2873 }, { "completion_length": 208.35714721679688, "epoch": 2.7581573896353166, "grad_norm": 1.3540689945220947, "kl": 0.32165515422821045, "learning_rate": 1.2535113145295152e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2874 }, { "completion_length": 150.7857208251953, "epoch": 2.759117082533589, "grad_norm": 0.012430292554199696, "kl": 0.3605705797672272, "learning_rate": 1.251755247247687e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2875 }, { "completion_length": 198.92857360839844, "epoch": 2.7600767754318616, "grad_norm": 1.0657495260238647, "kl": 0.34605470299720764, "learning_rate": 1.2500000000000005e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2876 }, { "completion_length": 228.71429443359375, "epoch": 2.7610364683301345, "grad_norm": 0.7161614298820496, "kl": 0.2867017686367035, "learning_rate": 1.248245573939563e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2877 }, { "completion_length": 203.85714721679688, "epoch": 2.761996161228407, "grad_norm": 0.014270815998315811, "kl": 0.31919950246810913, "learning_rate": 1.2464919702189466e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2878 }, { "completion_length": 157.7857208251953, "epoch": 2.7629558541266794, "grad_norm": 1.4428349733352661, "kl": 0.3801932632923126, "learning_rate": 1.2447391899901803e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2879 }, { "completion_length": 184.1428680419922, "epoch": 2.763915547024952, "grad_norm": 1.7362333536148071, "kl": 0.3458661437034607, "learning_rate": 1.2429872344047507e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2880 }, { "completion_length": 163.6428680419922, "epoch": 2.7648752399232244, "grad_norm": 1.4241787195205688, "kl": 0.3834794759750366, "learning_rate": 1.2412361046136063e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2881 }, { "completion_length": 153.2857208251953, "epoch": 2.7658349328214973, "grad_norm": 1.0912269353866577, "kl": 0.41430917382240295, "learning_rate": 1.2394858017671527e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2882 }, { "completion_length": 172.71429443359375, "epoch": 2.7667946257197698, "grad_norm": 1.3538093566894531, "kl": 0.348506897687912, "learning_rate": 1.2377363270152496e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2883 }, { "completion_length": 151.7857208251953, "epoch": 2.767754318618042, "grad_norm": 1.3662548065185547, "kl": 0.34945937991142273, "learning_rate": 1.235987681507214e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2884 }, { "completion_length": 211.1428680419922, "epoch": 2.7687140115163147, "grad_norm": 0.013435505330562592, "kl": 0.3811187446117401, "learning_rate": 1.2342398663918177e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2885 }, { "completion_length": 175.6428680419922, "epoch": 2.769673704414587, "grad_norm": 0.02518199197947979, "kl": 0.37576133012771606, "learning_rate": 1.232492882817288e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2886 }, { "completion_length": 156.35714721679688, "epoch": 2.77063339731286, "grad_norm": 1.2852084636688232, "kl": 0.3904193937778473, "learning_rate": 1.230746731931307e-07, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2887 }, { "completion_length": 164.2857208251953, "epoch": 2.7715930902111325, "grad_norm": 0.016139913350343704, "kl": 0.40519973635673523, "learning_rate": 1.2290014148810062e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2888 }, { "completion_length": 238.1428680419922, "epoch": 2.772552783109405, "grad_norm": 0.013858034275472164, "kl": 0.2689513564109802, "learning_rate": 1.2272569328129714e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2889 }, { "completion_length": 200.42857360839844, "epoch": 2.7735124760076775, "grad_norm": 0.8754991888999939, "kl": 0.2994016110897064, "learning_rate": 1.2255132868732409e-07, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2890 }, { "completion_length": 165.2857208251953, "epoch": 2.77447216890595, "grad_norm": 1.438745379447937, "kl": 0.3463890254497528, "learning_rate": 1.223770478207301e-07, "loss": 0.0003, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 2891 }, { "completion_length": 249.07144165039062, "epoch": 2.775431861804223, "grad_norm": 0.03535619378089905, "kl": 0.3166525959968567, "learning_rate": 1.2220285079600915e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2892 }, { "completion_length": 224.57144165039062, "epoch": 2.7763915547024953, "grad_norm": 0.01570621132850647, "kl": 0.2685598134994507, "learning_rate": 1.220287377275998e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2893 }, { "completion_length": 214.92857360839844, "epoch": 2.777351247600768, "grad_norm": 1.338941216468811, "kl": 0.29187917709350586, "learning_rate": 1.2185470872988557e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2894 }, { "completion_length": 238.7857208251953, "epoch": 2.7783109404990403, "grad_norm": 0.01141204684972763, "kl": 0.25055235624313354, "learning_rate": 1.2168076391719489e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2895 }, { "completion_length": 146.7857208251953, "epoch": 2.7792706333973127, "grad_norm": 1.5675116777420044, "kl": 0.38775691390037537, "learning_rate": 1.2150690340380061e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2896 }, { "completion_length": 185.57144165039062, "epoch": 2.7802303262955856, "grad_norm": 1.3368442058563232, "kl": 0.3293341398239136, "learning_rate": 1.2133312730392059e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2897 }, { "completion_length": 230.07144165039062, "epoch": 2.781190019193858, "grad_norm": 0.028839001432061195, "kl": 0.3152460753917694, "learning_rate": 1.2115943573171685e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2898 }, { "completion_length": 182.57144165039062, "epoch": 2.7821497120921306, "grad_norm": 0.007672599516808987, "kl": 0.2828333079814911, "learning_rate": 1.2098582880129598e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2899 }, { "completion_length": 146.57144165039062, "epoch": 2.783109404990403, "grad_norm": 2.1336660385131836, "kl": 0.43448805809020996, "learning_rate": 1.2081230662670907e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2900 }, { "completion_length": 200.21429443359375, "epoch": 2.7840690978886755, "grad_norm": 0.010070801712572575, "kl": 0.32737359404563904, "learning_rate": 1.206388693219516e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2901 }, { "completion_length": 167.57144165039062, "epoch": 2.7850287907869484, "grad_norm": 0.01518511213362217, "kl": 0.43747836351394653, "learning_rate": 1.204655170009631e-07, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 2902 }, { "completion_length": 171.85714721679688, "epoch": 2.785988483685221, "grad_norm": 1.0976386070251465, "kl": 0.3537646234035492, "learning_rate": 1.2029224977762723e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2903 }, { "completion_length": 187.1428680419922, "epoch": 2.7869481765834934, "grad_norm": 0.010972268879413605, "kl": 0.3374401032924652, "learning_rate": 1.2011906776577202e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2904 }, { "completion_length": 183.6428680419922, "epoch": 2.787907869481766, "grad_norm": 1.4228100776672363, "kl": 0.32781466841697693, "learning_rate": 1.1994597107916919e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2905 }, { "completion_length": 223.57144165039062, "epoch": 2.7888675623800383, "grad_norm": 0.6824576258659363, "kl": 0.28354254364967346, "learning_rate": 1.1977295983153477e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2906 }, { "completion_length": 207.92857360839844, "epoch": 2.789827255278311, "grad_norm": 0.024030055850744247, "kl": 0.37626704573631287, "learning_rate": 1.1960003413652832e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2907 }, { "completion_length": 177.35714721679688, "epoch": 2.7907869481765832, "grad_norm": 0.013336972333490849, "kl": 0.3621182143688202, "learning_rate": 1.1942719410775335e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2908 }, { "completion_length": 243.21429443359375, "epoch": 2.791746641074856, "grad_norm": 0.01495375856757164, "kl": 0.25090324878692627, "learning_rate": 1.192544398587572e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2909 }, { "completion_length": 205.7857208251953, "epoch": 2.7927063339731286, "grad_norm": 0.01394729595631361, "kl": 0.35321173071861267, "learning_rate": 1.1908177150303054e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2910 }, { "completion_length": 195.21429443359375, "epoch": 2.793666026871401, "grad_norm": 1.012981653213501, "kl": 0.30397191643714905, "learning_rate": 1.1890918915400803e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2911 }, { "completion_length": 144.21429443359375, "epoch": 2.7946257197696736, "grad_norm": 0.013683449476957321, "kl": 0.453398734331131, "learning_rate": 1.1873669292506749e-07, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2912 }, { "completion_length": 209.00001525878906, "epoch": 2.795585412667946, "grad_norm": 1.156390905380249, "kl": 0.3288050591945648, "learning_rate": 1.1856428292953022e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2913 }, { "completion_length": 182.2857208251953, "epoch": 2.796545105566219, "grad_norm": 0.019002150744199753, "kl": 0.305315226316452, "learning_rate": 1.1839195928066101e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2914 }, { "completion_length": 187.57144165039062, "epoch": 2.7975047984644914, "grad_norm": 1.2104520797729492, "kl": 0.35355958342552185, "learning_rate": 1.1821972209166795e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2915 }, { "completion_length": 181.07144165039062, "epoch": 2.798464491362764, "grad_norm": 0.012105610221624374, "kl": 0.34991586208343506, "learning_rate": 1.1804757147570213e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2916 }, { "completion_length": 178.00001525878906, "epoch": 2.7994241842610363, "grad_norm": 0.7917937636375427, "kl": 0.3253062963485718, "learning_rate": 1.1787550754585776e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2917 }, { "completion_length": 197.92857360839844, "epoch": 2.800383877159309, "grad_norm": 1.026968240737915, "kl": 0.3589750826358795, "learning_rate": 1.1770353041517239e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2918 }, { "completion_length": 220.35714721679688, "epoch": 2.8013435700575817, "grad_norm": 0.01205106358975172, "kl": 0.2966404855251312, "learning_rate": 1.1753164019662618e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2919 }, { "completion_length": 193.6428680419922, "epoch": 2.802303262955854, "grad_norm": 1.137795329093933, "kl": 0.3702102601528168, "learning_rate": 1.1735983700314256e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2920 }, { "completion_length": 253.7857208251953, "epoch": 2.8032629558541267, "grad_norm": 0.010689289309084415, "kl": 0.2661501467227936, "learning_rate": 1.1718812094758751e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2921 }, { "completion_length": 150.0, "epoch": 2.804222648752399, "grad_norm": 0.014489643275737762, "kl": 0.49356964230537415, "learning_rate": 1.1701649214276979e-07, "loss": 0.0005, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 2922 }, { "completion_length": 195.6428680419922, "epoch": 2.8051823416506716, "grad_norm": 1.1103864908218384, "kl": 0.34505218267440796, "learning_rate": 1.1684495070144104e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2923 }, { "completion_length": 198.71429443359375, "epoch": 2.8061420345489445, "grad_norm": 1.2594192028045654, "kl": 0.3698483407497406, "learning_rate": 1.1667349673629526e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2924 }, { "completion_length": 139.6428680419922, "epoch": 2.807101727447217, "grad_norm": 2.325413465499878, "kl": 0.4265380799770355, "learning_rate": 1.1650213035996923e-07, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 2925 }, { "completion_length": 189.42857360839844, "epoch": 2.8080614203454894, "grad_norm": 0.009698837995529175, "kl": 0.3253130316734314, "learning_rate": 1.1633085168504197e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2926 }, { "completion_length": 216.2857208251953, "epoch": 2.809021113243762, "grad_norm": 0.8383966684341431, "kl": 0.45212826132774353, "learning_rate": 1.1615966082403489e-07, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2927 }, { "completion_length": 193.21429443359375, "epoch": 2.8099808061420344, "grad_norm": 0.016622105613350868, "kl": 0.3386085629463196, "learning_rate": 1.1598855788941189e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2928 }, { "completion_length": 216.21429443359375, "epoch": 2.8109404990403073, "grad_norm": 0.05912766233086586, "kl": 0.3352900445461273, "learning_rate": 1.158175429935791e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2929 }, { "completion_length": 187.00001525878906, "epoch": 2.8119001919385798, "grad_norm": 1.2862149477005005, "kl": 0.3104379177093506, "learning_rate": 1.156466162488846e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2930 }, { "completion_length": 201.35714721679688, "epoch": 2.8128598848368522, "grad_norm": 1.4265211820602417, "kl": 0.33637183904647827, "learning_rate": 1.1547577776761866e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2931 }, { "completion_length": 147.35714721679688, "epoch": 2.8138195777351247, "grad_norm": 0.014291906729340553, "kl": 0.49119794368743896, "learning_rate": 1.1530502766201369e-07, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2932 }, { "completion_length": 181.92857360839844, "epoch": 2.814779270633397, "grad_norm": 1.1965068578720093, "kl": 0.317710280418396, "learning_rate": 1.1513436604424378e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2933 }, { "completion_length": 171.35714721679688, "epoch": 2.81573896353167, "grad_norm": 1.054664134979248, "kl": 0.4739464223384857, "learning_rate": 1.1496379302642526e-07, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2934 }, { "completion_length": 175.50001525878906, "epoch": 2.8166986564299425, "grad_norm": 0.9676012992858887, "kl": 0.42971834540367126, "learning_rate": 1.1479330872061591e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2935 }, { "completion_length": 180.35714721679688, "epoch": 2.817658349328215, "grad_norm": 0.013649964705109596, "kl": 0.3624406158924103, "learning_rate": 1.1462291323881528e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2936 }, { "completion_length": 208.92857360839844, "epoch": 2.8186180422264875, "grad_norm": 1.352275013923645, "kl": 0.3262595534324646, "learning_rate": 1.1445260669296483e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2937 }, { "completion_length": 193.85714721679688, "epoch": 2.81957773512476, "grad_norm": 0.015110078267753124, "kl": 0.3232300281524658, "learning_rate": 1.1428238919494727e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2938 }, { "completion_length": 186.2857208251953, "epoch": 2.820537428023033, "grad_norm": 0.014699304476380348, "kl": 0.3786575496196747, "learning_rate": 1.1411226085658704e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2939 }, { "completion_length": 237.4285888671875, "epoch": 2.8214971209213053, "grad_norm": 0.011644456535577774, "kl": 0.30228152871131897, "learning_rate": 1.139422217896499e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2940 }, { "completion_length": 169.07144165039062, "epoch": 2.822456813819578, "grad_norm": 1.695887804031372, "kl": 0.3667924702167511, "learning_rate": 1.137722721058429e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2941 }, { "completion_length": 203.57144165039062, "epoch": 2.8234165067178503, "grad_norm": 0.02146627940237522, "kl": 0.36710622906684875, "learning_rate": 1.136024119168145e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2942 }, { "completion_length": 203.50001525878906, "epoch": 2.8243761996161227, "grad_norm": 0.01381299551576376, "kl": 0.39066237211227417, "learning_rate": 1.1343264133415442e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2943 }, { "completion_length": 158.42857360839844, "epoch": 2.8253358925143957, "grad_norm": 0.01642393134534359, "kl": 0.40747693181037903, "learning_rate": 1.1326296046939333e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2944 }, { "completion_length": 198.92857360839844, "epoch": 2.8262955854126677, "grad_norm": 1.0526924133300781, "kl": 0.3354114890098572, "learning_rate": 1.1309336943400303e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2945 }, { "completion_length": 216.07144165039062, "epoch": 2.8272552783109406, "grad_norm": 1.0466327667236328, "kl": 0.3134979009628296, "learning_rate": 1.1292386833939621e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2946 }, { "completion_length": 185.57144165039062, "epoch": 2.828214971209213, "grad_norm": 1.2241867780685425, "kl": 0.30719614028930664, "learning_rate": 1.127544572969267e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2947 }, { "completion_length": 165.42857360839844, "epoch": 2.8291746641074855, "grad_norm": 0.012390236370265484, "kl": 0.36246517300605774, "learning_rate": 1.1258513641788913e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2948 }, { "completion_length": 184.35714721679688, "epoch": 2.830134357005758, "grad_norm": 0.014333797618746758, "kl": 0.390194833278656, "learning_rate": 1.1241590581351876e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2949 }, { "completion_length": 154.85714721679688, "epoch": 2.8310940499040305, "grad_norm": 0.03573988005518913, "kl": 0.44249966740608215, "learning_rate": 1.1224676559499143e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2950 }, { "completion_length": 230.50001525878906, "epoch": 2.8320537428023034, "grad_norm": 0.012651651166379452, "kl": 0.3252258896827698, "learning_rate": 1.1207771587342405e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2951 }, { "completion_length": 184.21429443359375, "epoch": 2.833013435700576, "grad_norm": 1.3565505743026733, "kl": 0.4129496216773987, "learning_rate": 1.1190875675987355e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2952 }, { "completion_length": 222.21429443359375, "epoch": 2.8339731285988483, "grad_norm": 1.3148397207260132, "kl": 0.28782570362091064, "learning_rate": 1.1173988836533776e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2953 }, { "completion_length": 182.7857208251953, "epoch": 2.834932821497121, "grad_norm": 0.014325723052024841, "kl": 0.33897364139556885, "learning_rate": 1.1157111080075471e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2954 }, { "completion_length": 222.57144165039062, "epoch": 2.8358925143953932, "grad_norm": 0.013028165325522423, "kl": 0.32288533449172974, "learning_rate": 1.1140242417700266e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2955 }, { "completion_length": 140.0, "epoch": 2.836852207293666, "grad_norm": 0.01202974934130907, "kl": 0.4353886544704437, "learning_rate": 1.1123382860490035e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2956 }, { "completion_length": 162.1428680419922, "epoch": 2.8378119001919386, "grad_norm": 1.2628014087677002, "kl": 0.4267028272151947, "learning_rate": 1.110653241952067e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2957 }, { "completion_length": 170.6428680419922, "epoch": 2.838771593090211, "grad_norm": 0.01720697619020939, "kl": 0.39506304264068604, "learning_rate": 1.1089691105862053e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2958 }, { "completion_length": 178.35714721679688, "epoch": 2.8397312859884836, "grad_norm": 0.012603376060724258, "kl": 0.3770376145839691, "learning_rate": 1.1072858930578086e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2959 }, { "completion_length": 186.50001525878906, "epoch": 2.840690978886756, "grad_norm": 1.107194185256958, "kl": 0.4010328948497772, "learning_rate": 1.1056035904726651e-07, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 2960 }, { "completion_length": 193.07144165039062, "epoch": 2.841650671785029, "grad_norm": 1.5704869031906128, "kl": 0.3532956540584564, "learning_rate": 1.1039222039359644e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2961 }, { "completion_length": 157.0, "epoch": 2.8426103646833014, "grad_norm": 0.015503600239753723, "kl": 0.41296592354774475, "learning_rate": 1.1022417345522936e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2962 }, { "completion_length": 191.00001525878906, "epoch": 2.843570057581574, "grad_norm": 0.012544394470751286, "kl": 0.37038299441337585, "learning_rate": 1.1005621834256359e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2963 }, { "completion_length": 227.7857208251953, "epoch": 2.8445297504798464, "grad_norm": 0.012003358453512192, "kl": 0.29800185561180115, "learning_rate": 1.0988835516593712e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2964 }, { "completion_length": 159.71429443359375, "epoch": 2.845489443378119, "grad_norm": 2.9737579822540283, "kl": 0.5142619609832764, "learning_rate": 1.0972058403562781e-07, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 2965 }, { "completion_length": 180.35714721679688, "epoch": 2.8464491362763917, "grad_norm": 0.025309400632977486, "kl": 0.44446074962615967, "learning_rate": 1.0955290506185272e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2966 }, { "completion_length": 162.07144165039062, "epoch": 2.847408829174664, "grad_norm": 1.2836754322052002, "kl": 0.38903599977493286, "learning_rate": 1.0938531835476863e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2967 }, { "completion_length": 202.1428680419922, "epoch": 2.8483685220729367, "grad_norm": 1.2730772495269775, "kl": 0.36504310369491577, "learning_rate": 1.0921782402447158e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2968 }, { "completion_length": 200.42857360839844, "epoch": 2.849328214971209, "grad_norm": 0.6819480657577515, "kl": 0.3333289623260498, "learning_rate": 1.0905042218099678e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2969 }, { "completion_length": 214.2857208251953, "epoch": 2.8502879078694816, "grad_norm": 0.8192780017852783, "kl": 0.31760072708129883, "learning_rate": 1.0888311293431906e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2970 }, { "completion_length": 204.35714721679688, "epoch": 2.8512476007677545, "grad_norm": 0.012399300001561642, "kl": 0.3264618515968323, "learning_rate": 1.0871589639435203e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2971 }, { "completion_length": 192.50001525878906, "epoch": 2.852207293666027, "grad_norm": 0.022451266646385193, "kl": 0.3697144091129303, "learning_rate": 1.085487726709487e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2972 }, { "completion_length": 222.1428680419922, "epoch": 2.8531669865642995, "grad_norm": 0.9976574182510376, "kl": 0.3130507171154022, "learning_rate": 1.083817418739009e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2973 }, { "completion_length": 252.21429443359375, "epoch": 2.854126679462572, "grad_norm": 0.016002189368009567, "kl": 0.2855287492275238, "learning_rate": 1.0821480411293937e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2974 }, { "completion_length": 203.00001525878906, "epoch": 2.8550863723608444, "grad_norm": 0.019420567899942398, "kl": 0.35876867175102234, "learning_rate": 1.0804795949773394e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2975 }, { "completion_length": 160.57144165039062, "epoch": 2.8560460652591173, "grad_norm": 0.011698253452777863, "kl": 0.3379177451133728, "learning_rate": 1.0788120813789326e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2976 }, { "completion_length": 213.35714721679688, "epoch": 2.8570057581573898, "grad_norm": 0.010289755649864674, "kl": 0.3478096127510071, "learning_rate": 1.0771455014296446e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2977 }, { "completion_length": 172.1428680419922, "epoch": 2.8579654510556622, "grad_norm": 1.519068717956543, "kl": 0.3880842626094818, "learning_rate": 1.0754798562243344e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2978 }, { "completion_length": 209.1428680419922, "epoch": 2.8589251439539347, "grad_norm": 1.2655935287475586, "kl": 0.3566540479660034, "learning_rate": 1.073815146857249e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2979 }, { "completion_length": 195.85714721679688, "epoch": 2.859884836852207, "grad_norm": 1.5214775800704956, "kl": 0.3567676246166229, "learning_rate": 1.0721513744220168e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2980 }, { "completion_length": 214.6428680419922, "epoch": 2.86084452975048, "grad_norm": 0.010558107867836952, "kl": 0.3281640410423279, "learning_rate": 1.0704885400116545e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2981 }, { "completion_length": 237.50001525878906, "epoch": 2.8618042226487526, "grad_norm": 0.01682334579527378, "kl": 0.29246312379837036, "learning_rate": 1.0688266447185604e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2982 }, { "completion_length": 155.42857360839844, "epoch": 2.862763915547025, "grad_norm": 0.016682330518960953, "kl": 0.395276814699173, "learning_rate": 1.0671656896345152e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2983 }, { "completion_length": 132.0, "epoch": 2.8637236084452975, "grad_norm": 0.03721519932150841, "kl": 0.7434507012367249, "learning_rate": 1.0655056758506845e-07, "loss": 0.0007, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2984 }, { "completion_length": 208.92857360839844, "epoch": 2.86468330134357, "grad_norm": 0.025639183819293976, "kl": 0.3572092354297638, "learning_rate": 1.0638466044576127e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2985 }, { "completion_length": 169.71429443359375, "epoch": 2.865642994241843, "grad_norm": 1.0601999759674072, "kl": 0.35519689321517944, "learning_rate": 1.062188476545228e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2986 }, { "completion_length": 202.42857360839844, "epoch": 2.866602687140115, "grad_norm": 0.020012538880109787, "kl": 0.4039165675640106, "learning_rate": 1.060531293202836e-07, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 2987 }, { "completion_length": 144.92857360839844, "epoch": 2.867562380038388, "grad_norm": 1.9102216958999634, "kl": 0.3965882658958435, "learning_rate": 1.0588750555191225e-07, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 2988 }, { "completion_length": 172.57144165039062, "epoch": 2.8685220729366603, "grad_norm": 0.012336580082774162, "kl": 0.37358444929122925, "learning_rate": 1.0572197645821531e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2989 }, { "completion_length": 191.07144165039062, "epoch": 2.8694817658349328, "grad_norm": 0.018313486129045486, "kl": 0.41576510667800903, "learning_rate": 1.0555654214793722e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2990 }, { "completion_length": 178.07144165039062, "epoch": 2.870441458733205, "grad_norm": 0.029850337654352188, "kl": 0.41343626379966736, "learning_rate": 1.053912027297599e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2991 }, { "completion_length": 188.7857208251953, "epoch": 2.8714011516314777, "grad_norm": 0.012018994428217411, "kl": 0.37427204847335815, "learning_rate": 1.0522595831230294e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2992 }, { "completion_length": 226.50001525878906, "epoch": 2.8723608445297506, "grad_norm": 0.016673004254698753, "kl": 0.3521902859210968, "learning_rate": 1.0506080900412384e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2993 }, { "completion_length": 154.2857208251953, "epoch": 2.873320537428023, "grad_norm": 1.1429574489593506, "kl": 0.3988453447818756, "learning_rate": 1.0489575491371719e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 2994 }, { "completion_length": 192.92857360839844, "epoch": 2.8742802303262955, "grad_norm": 0.012544825673103333, "kl": 0.37280982732772827, "learning_rate": 1.0473079614951544e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2995 }, { "completion_length": 181.42857360839844, "epoch": 2.875239923224568, "grad_norm": 2.8887434005737305, "kl": 0.5064612627029419, "learning_rate": 1.0456593281988815e-07, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 2996 }, { "completion_length": 178.1428680419922, "epoch": 2.8761996161228405, "grad_norm": 0.01910584233701229, "kl": 0.39815017580986023, "learning_rate": 1.0440116503314211e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2997 }, { "completion_length": 216.71429443359375, "epoch": 2.8771593090211134, "grad_norm": 0.010795722715556622, "kl": 0.37149035930633545, "learning_rate": 1.042364928975217e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2998 }, { "completion_length": 184.6428680419922, "epoch": 2.878119001919386, "grad_norm": 0.013356378301978111, "kl": 0.36433616280555725, "learning_rate": 1.0407191652120809e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 2999 }, { "completion_length": 200.00001525878906, "epoch": 2.8790786948176583, "grad_norm": 0.8210156559944153, "kl": 0.35909804701805115, "learning_rate": 1.0390743601231983e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3000 }, { "completion_length": 229.07144165039062, "epoch": 2.880038387715931, "grad_norm": 0.014413192868232727, "kl": 0.33391454815864563, "learning_rate": 1.0374305147891227e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3001 }, { "completion_length": 170.21429443359375, "epoch": 2.8809980806142033, "grad_norm": 0.01204520370811224, "kl": 0.3985385596752167, "learning_rate": 1.0357876302897772e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3002 }, { "completion_length": 214.00001525878906, "epoch": 2.881957773512476, "grad_norm": 0.012152183800935745, "kl": 0.30223798751831055, "learning_rate": 1.0341457077044554e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3003 }, { "completion_length": 202.85714721679688, "epoch": 2.8829174664107486, "grad_norm": 0.016482887789607048, "kl": 0.33293795585632324, "learning_rate": 1.0325047481118191e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3004 }, { "completion_length": 215.50001525878906, "epoch": 2.883877159309021, "grad_norm": 0.009537413716316223, "kl": 0.3331228196620941, "learning_rate": 1.030864752589895e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3005 }, { "completion_length": 190.07144165039062, "epoch": 2.8848368522072936, "grad_norm": 1.4414201974868774, "kl": 0.36014339327812195, "learning_rate": 1.0292257222160774e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3006 }, { "completion_length": 207.50001525878906, "epoch": 2.885796545105566, "grad_norm": 0.015177277848124504, "kl": 0.31922417879104614, "learning_rate": 1.0275876580671284e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3007 }, { "completion_length": 243.00001525878906, "epoch": 2.886756238003839, "grad_norm": 1.204954743385315, "kl": 0.2860802710056305, "learning_rate": 1.0259505612191724e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3008 }, { "completion_length": 229.00001525878906, "epoch": 2.8877159309021114, "grad_norm": 0.9681979417800903, "kl": 0.30652114748954773, "learning_rate": 1.0243144327477013e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3009 }, { "completion_length": 182.1428680419922, "epoch": 2.888675623800384, "grad_norm": 0.02286895178258419, "kl": 0.4327605962753296, "learning_rate": 1.0226792737275689e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3010 }, { "completion_length": 167.71429443359375, "epoch": 2.8896353166986564, "grad_norm": 0.8441108465194702, "kl": 0.3798415958881378, "learning_rate": 1.0210450852329913e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3011 }, { "completion_length": 152.2857208251953, "epoch": 2.890595009596929, "grad_norm": 1.8384939432144165, "kl": 0.3709987699985504, "learning_rate": 1.0194118683375502e-07, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3012 }, { "completion_length": 157.35714721679688, "epoch": 2.8915547024952017, "grad_norm": 0.02662903442978859, "kl": 0.4621737599372864, "learning_rate": 1.0177796241141856e-07, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3013 }, { "completion_length": 225.7857208251953, "epoch": 2.892514395393474, "grad_norm": 0.013989144936203957, "kl": 0.27890118956565857, "learning_rate": 1.0161483536352017e-07, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3014 }, { "completion_length": 204.35714721679688, "epoch": 2.8934740882917467, "grad_norm": 0.0373128280043602, "kl": 0.39538028836250305, "learning_rate": 1.0145180579722602e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3015 }, { "completion_length": 188.21429443359375, "epoch": 2.894433781190019, "grad_norm": 0.014340722002089024, "kl": 0.3522270619869232, "learning_rate": 1.0128887381963826e-07, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3016 }, { "completion_length": 223.85714721679688, "epoch": 2.8953934740882916, "grad_norm": 1.0798712968826294, "kl": 0.2850089967250824, "learning_rate": 1.0112603953779517e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3017 }, { "completion_length": 251.85714721679688, "epoch": 2.8963531669865645, "grad_norm": 0.7885937690734863, "kl": 0.25591444969177246, "learning_rate": 1.009633030586707e-07, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3018 }, { "completion_length": 148.1428680419922, "epoch": 2.897312859884837, "grad_norm": 2.8052399158477783, "kl": 0.4487445652484894, "learning_rate": 1.0080066448917451e-07, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3019 }, { "completion_length": 183.57144165039062, "epoch": 2.8982725527831095, "grad_norm": 0.014737842604517937, "kl": 0.32495903968811035, "learning_rate": 1.0063812393615198e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3020 }, { "completion_length": 233.07144165039062, "epoch": 2.899232245681382, "grad_norm": 0.018482625484466553, "kl": 0.3089079260826111, "learning_rate": 1.0047568150638397e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3021 }, { "completion_length": 167.57144165039062, "epoch": 2.9001919385796544, "grad_norm": 1.0909836292266846, "kl": 0.40472277998924255, "learning_rate": 1.0031333730658712e-07, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3022 }, { "completion_length": 210.57144165039062, "epoch": 2.9011516314779273, "grad_norm": 0.010243497788906097, "kl": 0.3325020372867584, "learning_rate": 1.0015109144341352e-07, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3023 }, { "completion_length": 216.2857208251953, "epoch": 2.9021113243761993, "grad_norm": 0.023411665111780167, "kl": 0.389439195394516, "learning_rate": 9.998894402345043e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3024 }, { "completion_length": 186.57144165039062, "epoch": 2.9030710172744723, "grad_norm": 0.011477463878691196, "kl": 0.35074344277381897, "learning_rate": 9.982689515322049e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3025 }, { "completion_length": 157.21429443359375, "epoch": 2.9040307101727447, "grad_norm": 0.023950813338160515, "kl": 0.4421203136444092, "learning_rate": 9.966494493918187e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3026 }, { "completion_length": 201.71429443359375, "epoch": 2.904990403071017, "grad_norm": 1.5763795375823975, "kl": 0.34466561675071716, "learning_rate": 9.950309348772751e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3027 }, { "completion_length": 178.2857208251953, "epoch": 2.90595009596929, "grad_norm": 0.017539778724312782, "kl": 0.412850946187973, "learning_rate": 9.934134090518592e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3028 }, { "completion_length": 222.71429443359375, "epoch": 2.906909788867562, "grad_norm": 0.028025154024362564, "kl": 0.32124727964401245, "learning_rate": 9.917968729782031e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3029 }, { "completion_length": 147.1428680419922, "epoch": 2.907869481765835, "grad_norm": 0.04125657305121422, "kl": 0.48698824644088745, "learning_rate": 9.901813277182891e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3030 }, { "completion_length": 223.4285888671875, "epoch": 2.9088291746641075, "grad_norm": 0.013244584202766418, "kl": 0.3217560946941376, "learning_rate": 9.8856677433345e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3031 }, { "completion_length": 243.6428680419922, "epoch": 2.90978886756238, "grad_norm": 1.2525360584259033, "kl": 0.30292007327079773, "learning_rate": 9.869532138843672e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3032 }, { "completion_length": 189.42857360839844, "epoch": 2.9107485604606524, "grad_norm": 0.015005200169980526, "kl": 0.3784869909286499, "learning_rate": 9.853406474310683e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3033 }, { "completion_length": 217.2857208251953, "epoch": 2.911708253358925, "grad_norm": 0.013021481223404408, "kl": 0.317598819732666, "learning_rate": 9.837290760329283e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3034 }, { "completion_length": 218.7857208251953, "epoch": 2.912667946257198, "grad_norm": 1.2888264656066895, "kl": 0.326771080493927, "learning_rate": 9.821185007486682e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3035 }, { "completion_length": 176.85714721679688, "epoch": 2.9136276391554703, "grad_norm": 0.01477950718253851, "kl": 0.35335296392440796, "learning_rate": 9.805089226363553e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3036 }, { "completion_length": 250.50001525878906, "epoch": 2.9145873320537428, "grad_norm": 0.011814302764832973, "kl": 0.28811001777648926, "learning_rate": 9.789003427534029e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3037 }, { "completion_length": 175.6428680419922, "epoch": 2.9155470249520152, "grad_norm": 1.1097310781478882, "kl": 0.4268227219581604, "learning_rate": 9.772927621565666e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3038 }, { "completion_length": 214.42857360839844, "epoch": 2.9165067178502877, "grad_norm": 1.3596187829971313, "kl": 0.29556208848953247, "learning_rate": 9.75686181901945e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3039 }, { "completion_length": 145.21429443359375, "epoch": 2.9174664107485606, "grad_norm": 2.9511871337890625, "kl": 0.5639101266860962, "learning_rate": 9.740806030449822e-08, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3040 }, { "completion_length": 221.1428680419922, "epoch": 2.918426103646833, "grad_norm": 1.1717984676361084, "kl": 0.2982242703437805, "learning_rate": 9.724760266404614e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3041 }, { "completion_length": 220.71429443359375, "epoch": 2.9193857965451055, "grad_norm": 0.02172568254172802, "kl": 0.3130074143409729, "learning_rate": 9.708724537425106e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3042 }, { "completion_length": 166.57144165039062, "epoch": 2.920345489443378, "grad_norm": 1.307288408279419, "kl": 0.3705422580242157, "learning_rate": 9.692698854045958e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3043 }, { "completion_length": 196.42857360839844, "epoch": 2.9213051823416505, "grad_norm": 0.019564364105463028, "kl": 0.36473438143730164, "learning_rate": 9.676683226795229e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3044 }, { "completion_length": 199.50001525878906, "epoch": 2.9222648752399234, "grad_norm": 1.7085604667663574, "kl": 0.40800100564956665, "learning_rate": 9.6606776661944e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3045 }, { "completion_length": 195.1428680419922, "epoch": 2.923224568138196, "grad_norm": 1.0676627159118652, "kl": 0.30992960929870605, "learning_rate": 9.644682182758304e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3046 }, { "completion_length": 178.35714721679688, "epoch": 2.9241842610364683, "grad_norm": 0.013744012452661991, "kl": 0.37126579880714417, "learning_rate": 9.628696786995188e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3047 }, { "completion_length": 182.92857360839844, "epoch": 2.925143953934741, "grad_norm": 0.023103633895516396, "kl": 0.32676267623901367, "learning_rate": 9.612721489406647e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3048 }, { "completion_length": 165.42857360839844, "epoch": 2.9261036468330133, "grad_norm": 0.9076974391937256, "kl": 0.36764228343963623, "learning_rate": 9.596756300487637e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3049 }, { "completion_length": 184.7857208251953, "epoch": 2.927063339731286, "grad_norm": 0.019313234835863113, "kl": 0.3781331777572632, "learning_rate": 9.580801230726501e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3050 }, { "completion_length": 191.07144165039062, "epoch": 2.9280230326295587, "grad_norm": 0.014204350300133228, "kl": 0.4226023554801941, "learning_rate": 9.564856290604922e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3051 }, { "completion_length": 162.35714721679688, "epoch": 2.928982725527831, "grad_norm": 0.009721128270030022, "kl": 0.38412851095199585, "learning_rate": 9.548921490597916e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3052 }, { "completion_length": 236.21429443359375, "epoch": 2.9299424184261036, "grad_norm": 0.01702950708568096, "kl": 0.3182714581489563, "learning_rate": 9.532996841173844e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3053 }, { "completion_length": 185.42857360839844, "epoch": 2.930902111324376, "grad_norm": 1.0667401552200317, "kl": 0.36739641427993774, "learning_rate": 9.517082352794411e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3054 }, { "completion_length": 174.92857360839844, "epoch": 2.931861804222649, "grad_norm": 1.0512655973434448, "kl": 0.3425713777542114, "learning_rate": 9.501178035914626e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3055 }, { "completion_length": 159.0, "epoch": 2.9328214971209214, "grad_norm": 1.575206995010376, "kl": 0.4607039988040924, "learning_rate": 9.485283900982841e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3056 }, { "completion_length": 177.21429443359375, "epoch": 2.933781190019194, "grad_norm": 1.7487213611602783, "kl": 0.37209072709083557, "learning_rate": 9.469399958440696e-08, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3057 }, { "completion_length": 173.21429443359375, "epoch": 2.9347408829174664, "grad_norm": 0.013805700466036797, "kl": 0.3988668620586395, "learning_rate": 9.453526218723134e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3058 }, { "completion_length": 160.0, "epoch": 2.935700575815739, "grad_norm": 0.01556351687759161, "kl": 0.3999289870262146, "learning_rate": 9.437662692258427e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3059 }, { "completion_length": 181.35714721679688, "epoch": 2.9366602687140118, "grad_norm": 1.0751293897628784, "kl": 0.37928059697151184, "learning_rate": 9.421809389468097e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3060 }, { "completion_length": 247.71429443359375, "epoch": 2.9376199616122842, "grad_norm": 1.3986423015594482, "kl": 0.3331449627876282, "learning_rate": 9.405966320766981e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3061 }, { "completion_length": 112.0714340209961, "epoch": 2.9385796545105567, "grad_norm": 1.7905770540237427, "kl": 0.5994556546211243, "learning_rate": 9.390133496563174e-08, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3062 }, { "completion_length": 202.00001525878906, "epoch": 2.939539347408829, "grad_norm": 1.216615915298462, "kl": 0.3583272099494934, "learning_rate": 9.374310927258038e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3063 }, { "completion_length": 190.1428680419922, "epoch": 2.9404990403071016, "grad_norm": 1.6207780838012695, "kl": 0.3414060175418854, "learning_rate": 9.358498623246219e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3064 }, { "completion_length": 173.42857360839844, "epoch": 2.9414587332053745, "grad_norm": 0.010155725292861462, "kl": 0.3414193093776703, "learning_rate": 9.342696594915608e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3065 }, { "completion_length": 214.42857360839844, "epoch": 2.9424184261036466, "grad_norm": 0.013514328747987747, "kl": 0.28601449728012085, "learning_rate": 9.326904852647344e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3066 }, { "completion_length": 187.50001525878906, "epoch": 2.9433781190019195, "grad_norm": 1.2218984365463257, "kl": 0.39023640751838684, "learning_rate": 9.311123406815793e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3067 }, { "completion_length": 227.71429443359375, "epoch": 2.944337811900192, "grad_norm": 1.020519495010376, "kl": 0.4239940345287323, "learning_rate": 9.295352267788592e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3068 }, { "completion_length": 180.6428680419922, "epoch": 2.9452975047984644, "grad_norm": 0.02501499652862549, "kl": 0.49872106313705444, "learning_rate": 9.279591445926574e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3069 }, { "completion_length": 183.92857360839844, "epoch": 2.946257197696737, "grad_norm": 0.028297679498791695, "kl": 0.3946966528892517, "learning_rate": 9.263840951583821e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3070 }, { "completion_length": 139.92857360839844, "epoch": 2.9472168905950094, "grad_norm": 0.014347456395626068, "kl": 0.4559780955314636, "learning_rate": 9.24810079510761e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3071 }, { "completion_length": 230.35714721679688, "epoch": 2.9481765834932823, "grad_norm": 0.010721570812165737, "kl": 0.3411523699760437, "learning_rate": 9.232370986838428e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3072 }, { "completion_length": 247.57144165039062, "epoch": 2.9491362763915547, "grad_norm": 0.011366719380021095, "kl": 0.28674885630607605, "learning_rate": 9.216651537109982e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3073 }, { "completion_length": 191.92857360839844, "epoch": 2.950095969289827, "grad_norm": 1.5635122060775757, "kl": 0.3450399339199066, "learning_rate": 9.200942456249145e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3074 }, { "completion_length": 190.92857360839844, "epoch": 2.9510556621880997, "grad_norm": 1.5486280918121338, "kl": 0.33662503957748413, "learning_rate": 9.185243754576011e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3075 }, { "completion_length": 171.35714721679688, "epoch": 2.952015355086372, "grad_norm": 1.8918460607528687, "kl": 0.44052618741989136, "learning_rate": 9.169555442403834e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3076 }, { "completion_length": 245.7857208251953, "epoch": 2.952975047984645, "grad_norm": 0.011420004069805145, "kl": 0.27179673314094543, "learning_rate": 9.153877530039036e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3077 }, { "completion_length": 252.00001525878906, "epoch": 2.9539347408829175, "grad_norm": 0.015297582373023033, "kl": 0.35856595635414124, "learning_rate": 9.138210027781228e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3078 }, { "completion_length": 191.21429443359375, "epoch": 2.95489443378119, "grad_norm": 0.02084558643400669, "kl": 0.4043075144290924, "learning_rate": 9.122552945923181e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3079 }, { "completion_length": 173.42857360839844, "epoch": 2.9558541266794625, "grad_norm": 0.010743150487542152, "kl": 0.362926185131073, "learning_rate": 9.106906294750804e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3080 }, { "completion_length": 171.07144165039062, "epoch": 2.956813819577735, "grad_norm": 0.987017035484314, "kl": 0.35706037282943726, "learning_rate": 9.091270084543159e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3081 }, { "completion_length": 167.85714721679688, "epoch": 2.957773512476008, "grad_norm": 2.7100768089294434, "kl": 0.4286220967769623, "learning_rate": 9.075644325572446e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3082 }, { "completion_length": 197.21429443359375, "epoch": 2.9587332053742803, "grad_norm": 1.6305999755859375, "kl": 0.360527902841568, "learning_rate": 9.060029028104008e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3083 }, { "completion_length": 203.71429443359375, "epoch": 2.9596928982725528, "grad_norm": 0.013608383946120739, "kl": 0.32586944103240967, "learning_rate": 9.044424202396325e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3084 }, { "completion_length": 198.7857208251953, "epoch": 2.9606525911708252, "grad_norm": 0.01170673780143261, "kl": 0.34897294640541077, "learning_rate": 9.028829858700973e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3085 }, { "completion_length": 161.2857208251953, "epoch": 2.9616122840690977, "grad_norm": 1.3862935304641724, "kl": 0.36528366804122925, "learning_rate": 9.013246007262642e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3086 }, { "completion_length": 203.35714721679688, "epoch": 2.9625719769673706, "grad_norm": 1.3102391958236694, "kl": 0.33605483174324036, "learning_rate": 8.997672658319163e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3087 }, { "completion_length": 186.71429443359375, "epoch": 2.963531669865643, "grad_norm": 1.260646104812622, "kl": 0.7189136147499084, "learning_rate": 8.982109822101425e-08, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3088 }, { "completion_length": 158.21429443359375, "epoch": 2.9644913627639156, "grad_norm": 1.8141268491744995, "kl": 0.3523443639278412, "learning_rate": 8.966557508833447e-08, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3089 }, { "completion_length": 159.35714721679688, "epoch": 2.965451055662188, "grad_norm": 2.9768362045288086, "kl": 0.48932307958602905, "learning_rate": 8.951015728732309e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3090 }, { "completion_length": 170.0, "epoch": 2.9664107485604605, "grad_norm": 1.3435002565383911, "kl": 0.3946378827095032, "learning_rate": 8.935484492008169e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3091 }, { "completion_length": 186.50001525878906, "epoch": 2.9673704414587334, "grad_norm": 0.013737055473029613, "kl": 0.3799716830253601, "learning_rate": 8.919963808864283e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3092 }, { "completion_length": 162.0, "epoch": 2.968330134357006, "grad_norm": 1.3610361814498901, "kl": 0.38087907433509827, "learning_rate": 8.904453689496972e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3093 }, { "completion_length": 194.1428680419922, "epoch": 2.9692898272552783, "grad_norm": 0.017083588987588882, "kl": 0.379444420337677, "learning_rate": 8.888954144095586e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3094 }, { "completion_length": 237.57144165039062, "epoch": 2.970249520153551, "grad_norm": 0.013936476781964302, "kl": 0.25567227602005005, "learning_rate": 8.87346518284256e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3095 }, { "completion_length": 180.35714721679688, "epoch": 2.9712092130518233, "grad_norm": 0.9872697591781616, "kl": 0.4102381467819214, "learning_rate": 8.857986815913351e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3096 }, { "completion_length": 205.6428680419922, "epoch": 2.972168905950096, "grad_norm": 0.015907833352684975, "kl": 0.37473875284194946, "learning_rate": 8.842519053476475e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3097 }, { "completion_length": 180.71429443359375, "epoch": 2.9731285988483687, "grad_norm": 1.3105864524841309, "kl": 0.36287468671798706, "learning_rate": 8.827061905693486e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3098 }, { "completion_length": 263.5, "epoch": 2.974088291746641, "grad_norm": 0.008720600977540016, "kl": 0.26683560013771057, "learning_rate": 8.811615382718949e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3099 }, { "completion_length": 179.00001525878906, "epoch": 2.9750479846449136, "grad_norm": 0.71041339635849, "kl": 0.4023151695728302, "learning_rate": 8.796179494700439e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3100 }, { "completion_length": 150.5, "epoch": 2.976007677543186, "grad_norm": 2.3491504192352295, "kl": 0.5374383330345154, "learning_rate": 8.780754251778582e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3101 }, { "completion_length": 174.6428680419922, "epoch": 2.976967370441459, "grad_norm": 0.012906413525342941, "kl": 0.4027378261089325, "learning_rate": 8.765339664086969e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3102 }, { "completion_length": 255.4285888671875, "epoch": 2.9779270633397315, "grad_norm": 0.7561459541320801, "kl": 0.3107239902019501, "learning_rate": 8.749935741752227e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3103 }, { "completion_length": 181.6428680419922, "epoch": 2.978886756238004, "grad_norm": 0.01431514322757721, "kl": 0.3749781548976898, "learning_rate": 8.734542494893954e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3104 }, { "completion_length": 208.00001525878906, "epoch": 2.9798464491362764, "grad_norm": 0.011779137887060642, "kl": 0.3208637237548828, "learning_rate": 8.719159933624726e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3105 }, { "completion_length": 212.2857208251953, "epoch": 2.980806142034549, "grad_norm": 0.009816286154091358, "kl": 0.28385937213897705, "learning_rate": 8.703788068050139e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3106 }, { "completion_length": 197.85714721679688, "epoch": 2.9817658349328218, "grad_norm": 1.7225239276885986, "kl": 0.34039515256881714, "learning_rate": 8.688426908268711e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3107 }, { "completion_length": 219.00001525878906, "epoch": 2.982725527831094, "grad_norm": 0.011250639334321022, "kl": 0.337879478931427, "learning_rate": 8.673076464371979e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3108 }, { "completion_length": 182.2857208251953, "epoch": 2.9836852207293667, "grad_norm": 1.7264543771743774, "kl": 0.3600650727748871, "learning_rate": 8.657736746444397e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3109 }, { "completion_length": 165.71429443359375, "epoch": 2.984644913627639, "grad_norm": 1.0514216423034668, "kl": 0.3786561191082001, "learning_rate": 8.642407764563387e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3110 }, { "completion_length": 160.42857360839844, "epoch": 2.9856046065259116, "grad_norm": 0.9549422860145569, "kl": 0.45911136269569397, "learning_rate": 8.627089528799325e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3111 }, { "completion_length": 229.57144165039062, "epoch": 2.986564299424184, "grad_norm": 0.013508989475667477, "kl": 0.3161432445049286, "learning_rate": 8.611782049215532e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3112 }, { "completion_length": 159.5, "epoch": 2.9875239923224566, "grad_norm": 1.684861421585083, "kl": 0.4402216672897339, "learning_rate": 8.59648533586825e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3113 }, { "completion_length": 232.2857208251953, "epoch": 2.9884836852207295, "grad_norm": 0.015000933781266212, "kl": 0.3283337354660034, "learning_rate": 8.581199398806641e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3114 }, { "completion_length": 263.3571472167969, "epoch": 2.989443378119002, "grad_norm": 0.013124760240316391, "kl": 0.25676313042640686, "learning_rate": 8.565924248072809e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3115 }, { "completion_length": 158.0, "epoch": 2.9904030710172744, "grad_norm": 0.8667517900466919, "kl": 0.4398522675037384, "learning_rate": 8.550659893701753e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3116 }, { "completion_length": 170.2857208251953, "epoch": 2.991362763915547, "grad_norm": 1.6801408529281616, "kl": 0.3740893304347992, "learning_rate": 8.535406345721403e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3117 }, { "completion_length": 125.28572082519531, "epoch": 2.9923224568138194, "grad_norm": 2.8478448390960693, "kl": 0.46036818623542786, "learning_rate": 8.520163614152559e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3118 }, { "completion_length": 157.85714721679688, "epoch": 2.9932821497120923, "grad_norm": 1.724549412727356, "kl": 0.47818106412887573, "learning_rate": 8.504931709008928e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3119 }, { "completion_length": 164.1428680419922, "epoch": 2.9942418426103647, "grad_norm": 3.118407726287842, "kl": 0.5398954749107361, "learning_rate": 8.489710640297124e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3120 }, { "completion_length": 151.7857208251953, "epoch": 2.995201535508637, "grad_norm": 0.015339745208621025, "kl": 0.41157811880111694, "learning_rate": 8.474500418016606e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3121 }, { "completion_length": 235.21429443359375, "epoch": 2.9961612284069097, "grad_norm": 0.01528110820800066, "kl": 0.29986968636512756, "learning_rate": 8.459301052159745e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3122 }, { "completion_length": 190.57144165039062, "epoch": 2.997120921305182, "grad_norm": 0.01505529135465622, "kl": 0.33503374457359314, "learning_rate": 8.444112552711752e-08, "loss": 0.0003, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3123 }, { "completion_length": 182.2857208251953, "epoch": 2.998080614203455, "grad_norm": 0.011844471096992493, "kl": 0.35308709740638733, "learning_rate": 8.4289349296507e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3124 }, { "completion_length": 176.42857360839844, "epoch": 2.9990403071017275, "grad_norm": 0.015171285718679428, "kl": 0.4162207543849945, "learning_rate": 8.413768192947532e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3125 }, { "completion_length": 173.7857208251953, "epoch": 3.0, "grad_norm": 0.9246425628662109, "kl": 0.4142411947250366, "learning_rate": 8.398612352566045e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3126 }, { "completion_length": 179.21429443359375, "epoch": 3.0009596928982725, "grad_norm": 0.013375315815210342, "kl": 0.3820241093635559, "learning_rate": 8.383467418462856e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3127 }, { "completion_length": 174.7857208251953, "epoch": 3.001919385796545, "grad_norm": 0.9600641131401062, "kl": 0.330721378326416, "learning_rate": 8.368333400587419e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3128 }, { "completion_length": 196.71429443359375, "epoch": 3.002879078694818, "grad_norm": 0.01420074887573719, "kl": 0.35420021414756775, "learning_rate": 8.353210308882036e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3129 }, { "completion_length": 176.57144165039062, "epoch": 3.0038387715930903, "grad_norm": 0.01291078981012106, "kl": 0.37698155641555786, "learning_rate": 8.338098153281806e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3130 }, { "completion_length": 214.35714721679688, "epoch": 3.004798464491363, "grad_norm": 1.119892954826355, "kl": 0.33481451869010925, "learning_rate": 8.322996943714672e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3131 }, { "completion_length": 200.57144165039062, "epoch": 3.0057581573896353, "grad_norm": 0.011689737439155579, "kl": 0.37502747774124146, "learning_rate": 8.307906690101363e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3132 }, { "completion_length": 185.07144165039062, "epoch": 3.0067178502879077, "grad_norm": 1.164272665977478, "kl": 0.3637620508670807, "learning_rate": 8.292827402355412e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3133 }, { "completion_length": 194.1428680419922, "epoch": 3.0076775431861806, "grad_norm": 1.2809052467346191, "kl": 0.37863025069236755, "learning_rate": 8.277759090383165e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3134 }, { "completion_length": 213.00001525878906, "epoch": 3.008637236084453, "grad_norm": 0.03376435860991478, "kl": 0.3874439597129822, "learning_rate": 8.262701764083735e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3135 }, { "completion_length": 234.00001525878906, "epoch": 3.0095969289827256, "grad_norm": 0.9438498616218567, "kl": 0.30935439467430115, "learning_rate": 8.247655433349046e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3136 }, { "completion_length": 148.6428680419922, "epoch": 3.010556621880998, "grad_norm": 0.017492255195975304, "kl": 0.4744341969490051, "learning_rate": 8.23262010806377e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3137 }, { "completion_length": 170.0, "epoch": 3.0115163147792705, "grad_norm": 0.01583334244787693, "kl": 0.40789374709129333, "learning_rate": 8.217595798105357e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3138 }, { "completion_length": 192.57144165039062, "epoch": 3.012476007677543, "grad_norm": 0.011290094815194607, "kl": 0.4302457571029663, "learning_rate": 8.202582513344028e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3139 }, { "completion_length": 222.4285888671875, "epoch": 3.013435700575816, "grad_norm": 0.02333386428654194, "kl": 0.30760806798934937, "learning_rate": 8.187580263642768e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3140 }, { "completion_length": 257.5, "epoch": 3.0143953934740884, "grad_norm": 0.9590516686439514, "kl": 0.28236159682273865, "learning_rate": 8.172589058857296e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3141 }, { "completion_length": 187.7857208251953, "epoch": 3.015355086372361, "grad_norm": 0.011522993445396423, "kl": 0.35440555214881897, "learning_rate": 8.15760890883607e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3142 }, { "completion_length": 247.7857208251953, "epoch": 3.0163147792706333, "grad_norm": 0.01228532288223505, "kl": 0.25818830728530884, "learning_rate": 8.14263982342031e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3143 }, { "completion_length": 163.21429443359375, "epoch": 3.0172744721689058, "grad_norm": 1.4115955829620361, "kl": 0.43432775139808655, "learning_rate": 8.127681812443946e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3144 }, { "completion_length": 240.35714721679688, "epoch": 3.0182341650671787, "grad_norm": 0.7173705101013184, "kl": 0.2943091094493866, "learning_rate": 8.11273488573365e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3145 }, { "completion_length": 206.92857360839844, "epoch": 3.019193857965451, "grad_norm": 0.7862855792045593, "kl": 0.35806453227996826, "learning_rate": 8.097799053108795e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3146 }, { "completion_length": 161.71429443359375, "epoch": 3.0201535508637236, "grad_norm": 1.4978530406951904, "kl": 0.4256344139575958, "learning_rate": 8.082874324381467e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3147 }, { "completion_length": 188.57144165039062, "epoch": 3.021113243761996, "grad_norm": 0.013035905547440052, "kl": 0.35749974846839905, "learning_rate": 8.067960709356478e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3148 }, { "completion_length": 197.7857208251953, "epoch": 3.0220729366602685, "grad_norm": 0.012192369438707829, "kl": 0.37250009179115295, "learning_rate": 8.053058217831308e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3149 }, { "completion_length": 205.2857208251953, "epoch": 3.0230326295585415, "grad_norm": 0.011126428842544556, "kl": 0.32530444860458374, "learning_rate": 8.038166859596163e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3150 }, { "completion_length": 223.50001525878906, "epoch": 3.023992322456814, "grad_norm": 0.8690300583839417, "kl": 0.321441650390625, "learning_rate": 8.023286644433913e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3151 }, { "completion_length": 174.35714721679688, "epoch": 3.0249520153550864, "grad_norm": 1.3572666645050049, "kl": 0.41078421473503113, "learning_rate": 8.008417582120097e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3152 }, { "completion_length": 198.7857208251953, "epoch": 3.025911708253359, "grad_norm": 1.7255303859710693, "kl": 0.3906296491622925, "learning_rate": 7.993559682422956e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3153 }, { "completion_length": 213.6428680419922, "epoch": 3.0268714011516313, "grad_norm": 0.012987536378204823, "kl": 0.32828855514526367, "learning_rate": 7.978712955103393e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3154 }, { "completion_length": 162.85714721679688, "epoch": 3.0278310940499042, "grad_norm": 0.010594216175377369, "kl": 0.3724607825279236, "learning_rate": 7.963877409914951e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3155 }, { "completion_length": 163.1428680419922, "epoch": 3.0287907869481767, "grad_norm": 1.1771821975708008, "kl": 0.407223105430603, "learning_rate": 7.94905305660384e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3156 }, { "completion_length": 210.1428680419922, "epoch": 3.029750479846449, "grad_norm": 1.7693651914596558, "kl": 0.35703837871551514, "learning_rate": 7.934239904908907e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3157 }, { "completion_length": 265.3571472167969, "epoch": 3.0307101727447217, "grad_norm": 0.8494130373001099, "kl": 0.29505854845046997, "learning_rate": 7.91943796456166e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3158 }, { "completion_length": 208.71429443359375, "epoch": 3.031669865642994, "grad_norm": 1.054151177406311, "kl": 0.36667102575302124, "learning_rate": 7.904647245286234e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3159 }, { "completion_length": 195.2857208251953, "epoch": 3.0326295585412666, "grad_norm": 0.012983627617359161, "kl": 0.3362997770309448, "learning_rate": 7.889867756799384e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3160 }, { "completion_length": 190.00001525878906, "epoch": 3.0335892514395395, "grad_norm": 1.3389462232589722, "kl": 0.35344722867012024, "learning_rate": 7.875099508810484e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3161 }, { "completion_length": 170.6428680419922, "epoch": 3.034548944337812, "grad_norm": 2.690741777420044, "kl": 0.49938109517097473, "learning_rate": 7.86034251102155e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3162 }, { "completion_length": 170.92857360839844, "epoch": 3.0355086372360844, "grad_norm": 1.1182419061660767, "kl": 0.4273473620414734, "learning_rate": 7.845596773127166e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3163 }, { "completion_length": 206.42857360839844, "epoch": 3.036468330134357, "grad_norm": 0.0190531425178051, "kl": 0.39190593361854553, "learning_rate": 7.830862304814564e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3164 }, { "completion_length": 197.07144165039062, "epoch": 3.0374280230326294, "grad_norm": 1.3052290678024292, "kl": 0.32132625579833984, "learning_rate": 7.81613911576354e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3165 }, { "completion_length": 212.21429443359375, "epoch": 3.0383877159309023, "grad_norm": 1.391313910484314, "kl": 0.3356729745864868, "learning_rate": 7.80142721564648e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3166 }, { "completion_length": 241.6428680419922, "epoch": 3.0393474088291748, "grad_norm": 0.011144638992846012, "kl": 0.268659770488739, "learning_rate": 7.786726614128375e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3167 }, { "completion_length": 216.21429443359375, "epoch": 3.0403071017274472, "grad_norm": 0.014909542165696621, "kl": 0.33156925439834595, "learning_rate": 7.772037320866786e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3168 }, { "completion_length": 305.0, "epoch": 3.0412667946257197, "grad_norm": 0.02277558110654354, "kl": 0.23867346346378326, "learning_rate": 7.757359345511841e-08, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3169 }, { "completion_length": 224.2857208251953, "epoch": 3.042226487523992, "grad_norm": 0.018961027264595032, "kl": 0.343322217464447, "learning_rate": 7.742692697706224e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3170 }, { "completion_length": 186.2857208251953, "epoch": 3.043186180422265, "grad_norm": 0.011709002777934074, "kl": 0.3329123556613922, "learning_rate": 7.728037387085185e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3171 }, { "completion_length": 224.50001525878906, "epoch": 3.0441458733205375, "grad_norm": 1.0372751951217651, "kl": 0.29411935806274414, "learning_rate": 7.71339342327653e-08, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3172 }, { "completion_length": 217.07144165039062, "epoch": 3.04510556621881, "grad_norm": 1.0274320840835571, "kl": 0.33081290125846863, "learning_rate": 7.698760815900621e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3173 }, { "completion_length": 223.4285888671875, "epoch": 3.0460652591170825, "grad_norm": 0.012752040289342403, "kl": 0.28085407614707947, "learning_rate": 7.684139574570333e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3174 }, { "completion_length": 188.00001525878906, "epoch": 3.047024952015355, "grad_norm": 1.4224046468734741, "kl": 0.3612431287765503, "learning_rate": 7.669529708891087e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3175 }, { "completion_length": 228.1428680419922, "epoch": 3.047984644913628, "grad_norm": 0.011129043996334076, "kl": 0.316430926322937, "learning_rate": 7.65493122846084e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3176 }, { "completion_length": 175.35714721679688, "epoch": 3.0489443378119003, "grad_norm": 0.012814090587198734, "kl": 0.39916175603866577, "learning_rate": 7.64034414287005e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3177 }, { "completion_length": 141.07144165039062, "epoch": 3.049904030710173, "grad_norm": 0.017047438770532608, "kl": 0.4787253737449646, "learning_rate": 7.625768461701713e-08, "loss": 0.0005, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3178 }, { "completion_length": 177.7857208251953, "epoch": 3.0508637236084453, "grad_norm": 2.30554461479187, "kl": 0.38223955035209656, "learning_rate": 7.611204194531315e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3179 }, { "completion_length": 146.2857208251953, "epoch": 3.0518234165067177, "grad_norm": 0.015557337552309036, "kl": 0.4534946382045746, "learning_rate": 7.596651350926836e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3180 }, { "completion_length": 155.1428680419922, "epoch": 3.05278310940499, "grad_norm": 1.029187798500061, "kl": 0.36585062742233276, "learning_rate": 7.582109940448783e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3181 }, { "completion_length": 223.9285888671875, "epoch": 3.053742802303263, "grad_norm": 0.026628779247403145, "kl": 0.35300004482269287, "learning_rate": 7.567579972650115e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3182 }, { "completion_length": 183.1428680419922, "epoch": 3.0547024952015356, "grad_norm": 1.1390408277511597, "kl": 0.3847799599170685, "learning_rate": 7.553061457076307e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3183 }, { "completion_length": 211.92857360839844, "epoch": 3.055662188099808, "grad_norm": 0.01038399338722229, "kl": 0.3036349415779114, "learning_rate": 7.53855440326529e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3184 }, { "completion_length": 170.6428680419922, "epoch": 3.0566218809980805, "grad_norm": 1.5181105136871338, "kl": 0.4100647270679474, "learning_rate": 7.524058820747459e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3185 }, { "completion_length": 186.42857360839844, "epoch": 3.057581573896353, "grad_norm": 1.7648792266845703, "kl": 0.3992255926132202, "learning_rate": 7.509574719045694e-08, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3186 }, { "completion_length": 155.92857360839844, "epoch": 3.058541266794626, "grad_norm": 0.024417052045464516, "kl": 0.5221456289291382, "learning_rate": 7.49510210767533e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3187 }, { "completion_length": 165.57144165039062, "epoch": 3.0595009596928984, "grad_norm": 0.9102832674980164, "kl": 0.3727390170097351, "learning_rate": 7.480640996144136e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3188 }, { "completion_length": 205.6428680419922, "epoch": 3.060460652591171, "grad_norm": 0.7313973307609558, "kl": 0.34202754497528076, "learning_rate": 7.466191393952332e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3189 }, { "completion_length": 169.7857208251953, "epoch": 3.0614203454894433, "grad_norm": 1.8236647844314575, "kl": 0.40185675024986267, "learning_rate": 7.451753310592595e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3190 }, { "completion_length": 183.21429443359375, "epoch": 3.0623800383877158, "grad_norm": 0.018645452335476875, "kl": 0.3661656975746155, "learning_rate": 7.437326755550005e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3191 }, { "completion_length": 187.7857208251953, "epoch": 3.0633397312859887, "grad_norm": 0.01655874215066433, "kl": 0.36571744084358215, "learning_rate": 7.422911738302104e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3192 }, { "completion_length": 218.50001525878906, "epoch": 3.064299424184261, "grad_norm": 0.011212405748665333, "kl": 0.333224892616272, "learning_rate": 7.40850826831882e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3193 }, { "completion_length": 217.42857360839844, "epoch": 3.0652591170825336, "grad_norm": 0.8916187882423401, "kl": 0.29705899953842163, "learning_rate": 7.394116355062508e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3194 }, { "completion_length": 158.92857360839844, "epoch": 3.066218809980806, "grad_norm": 1.3094862699508667, "kl": 0.5334965586662292, "learning_rate": 7.379736007987943e-08, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3195 }, { "completion_length": 177.57144165039062, "epoch": 3.0671785028790786, "grad_norm": 0.011198594234883785, "kl": 0.37654101848602295, "learning_rate": 7.365367236542283e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3196 }, { "completion_length": 222.57144165039062, "epoch": 3.068138195777351, "grad_norm": 1.5140315294265747, "kl": 0.308833509683609, "learning_rate": 7.351010050165097e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3197 }, { "completion_length": 214.2857208251953, "epoch": 3.069097888675624, "grad_norm": 0.014590680599212646, "kl": 0.2867349684238434, "learning_rate": 7.336664458288333e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3198 }, { "completion_length": 205.07144165039062, "epoch": 3.0700575815738964, "grad_norm": 0.010341638699173927, "kl": 0.32199016213417053, "learning_rate": 7.322330470336313e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3199 }, { "completion_length": 211.07144165039062, "epoch": 3.071017274472169, "grad_norm": 0.019280212000012398, "kl": 0.3294016420841217, "learning_rate": 7.30800809572576e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3200 }, { "completion_length": 175.00001525878906, "epoch": 3.0719769673704413, "grad_norm": 1.0212233066558838, "kl": 0.3396153748035431, "learning_rate": 7.293697343865759e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3201 }, { "completion_length": 210.7857208251953, "epoch": 3.072936660268714, "grad_norm": 1.017210841178894, "kl": 0.35917991399765015, "learning_rate": 7.279398224157751e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3202 }, { "completion_length": 199.57144165039062, "epoch": 3.0738963531669867, "grad_norm": 1.3304895162582397, "kl": 0.3648189902305603, "learning_rate": 7.265110745995529e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3203 }, { "completion_length": 177.7857208251953, "epoch": 3.074856046065259, "grad_norm": 1.7079029083251953, "kl": 0.5306779742240906, "learning_rate": 7.250834918765267e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3204 }, { "completion_length": 164.07144165039062, "epoch": 3.0758157389635317, "grad_norm": 1.2583949565887451, "kl": 0.4143732190132141, "learning_rate": 7.236570751845448e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3205 }, { "completion_length": 152.7857208251953, "epoch": 3.076775431861804, "grad_norm": 0.017018122598528862, "kl": 0.39520809054374695, "learning_rate": 7.222318254606933e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3206 }, { "completion_length": 174.21429443359375, "epoch": 3.0777351247600766, "grad_norm": 0.8615924715995789, "kl": 0.4376147985458374, "learning_rate": 7.208077436412887e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3207 }, { "completion_length": 156.2857208251953, "epoch": 3.0786948176583495, "grad_norm": 1.7529144287109375, "kl": 0.4409613013267517, "learning_rate": 7.193848306618805e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3208 }, { "completion_length": 217.00001525878906, "epoch": 3.079654510556622, "grad_norm": 0.9170036315917969, "kl": 0.310176283121109, "learning_rate": 7.179630874572529e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3209 }, { "completion_length": 173.85714721679688, "epoch": 3.0806142034548945, "grad_norm": 0.012356528080999851, "kl": 0.3861270844936371, "learning_rate": 7.165425149614179e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3210 }, { "completion_length": 171.57144165039062, "epoch": 3.081573896353167, "grad_norm": 1.147538423538208, "kl": 0.4387372136116028, "learning_rate": 7.151231141076222e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3211 }, { "completion_length": 211.42857360839844, "epoch": 3.0825335892514394, "grad_norm": 1.2537318468093872, "kl": 0.3108188807964325, "learning_rate": 7.137048858283401e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3212 }, { "completion_length": 186.1428680419922, "epoch": 3.0834932821497123, "grad_norm": 1.0754293203353882, "kl": 0.3617730736732483, "learning_rate": 7.122878310552752e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3213 }, { "completion_length": 201.92857360839844, "epoch": 3.0844529750479848, "grad_norm": 0.012040199711918831, "kl": 0.3847430646419525, "learning_rate": 7.108719507193626e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3214 }, { "completion_length": 154.1428680419922, "epoch": 3.0854126679462572, "grad_norm": 0.016659405082464218, "kl": 0.39245933294296265, "learning_rate": 7.09457245750765e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3215 }, { "completion_length": 199.6428680419922, "epoch": 3.0863723608445297, "grad_norm": 0.010680862702429295, "kl": 0.32895123958587646, "learning_rate": 7.080437170788722e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3216 }, { "completion_length": 203.35714721679688, "epoch": 3.087332053742802, "grad_norm": 0.011829365976154804, "kl": 0.3504783809185028, "learning_rate": 7.066313656323002e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3217 }, { "completion_length": 163.21429443359375, "epoch": 3.0882917466410746, "grad_norm": 0.9591008424758911, "kl": 0.41642025113105774, "learning_rate": 7.052201923388953e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3218 }, { "completion_length": 207.1428680419922, "epoch": 3.0892514395393476, "grad_norm": 0.014707069844007492, "kl": 0.36625897884368896, "learning_rate": 7.038101981257255e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3219 }, { "completion_length": 230.50001525878906, "epoch": 3.09021113243762, "grad_norm": 1.0515559911727905, "kl": 0.3211449384689331, "learning_rate": 7.024013839190879e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3220 }, { "completion_length": 194.07144165039062, "epoch": 3.0911708253358925, "grad_norm": 0.010913439095020294, "kl": 0.3854312300682068, "learning_rate": 7.009937506445019e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3221 }, { "completion_length": 149.35714721679688, "epoch": 3.092130518234165, "grad_norm": 0.011647527106106281, "kl": 0.4637737572193146, "learning_rate": 6.995872992267108e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3222 }, { "completion_length": 157.35714721679688, "epoch": 3.0930902111324374, "grad_norm": 1.1415817737579346, "kl": 0.4305313229560852, "learning_rate": 6.981820305896847e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3223 }, { "completion_length": 202.00001525878906, "epoch": 3.0940499040307103, "grad_norm": 0.011401083320379257, "kl": 0.31972038745880127, "learning_rate": 6.967779456566125e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3224 }, { "completion_length": 212.50001525878906, "epoch": 3.095009596928983, "grad_norm": 0.012825015932321548, "kl": 0.289950430393219, "learning_rate": 6.953750453499097e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3225 }, { "completion_length": 224.85714721679688, "epoch": 3.0959692898272553, "grad_norm": 0.010815553367137909, "kl": 0.31526273488998413, "learning_rate": 6.9397333059121e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3226 }, { "completion_length": 222.07144165039062, "epoch": 3.0969289827255277, "grad_norm": 1.5233439207077026, "kl": 0.368097722530365, "learning_rate": 6.925728023013689e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3227 }, { "completion_length": 253.1428680419922, "epoch": 3.0978886756238, "grad_norm": 0.011075167916715145, "kl": 0.2525060176849365, "learning_rate": 6.911734614004641e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3228 }, { "completion_length": 199.2857208251953, "epoch": 3.098848368522073, "grad_norm": 1.302308201789856, "kl": 0.3423072099685669, "learning_rate": 6.897753088077932e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3229 }, { "completion_length": 203.1428680419922, "epoch": 3.0998080614203456, "grad_norm": 0.024573080241680145, "kl": 0.3901473581790924, "learning_rate": 6.883783454418715e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3230 }, { "completion_length": 214.21429443359375, "epoch": 3.100767754318618, "grad_norm": 1.6031233072280884, "kl": 0.3195124566555023, "learning_rate": 6.869825722204336e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3231 }, { "completion_length": 178.21429443359375, "epoch": 3.1017274472168905, "grad_norm": 0.940491795539856, "kl": 0.3876490294933319, "learning_rate": 6.85587990060432e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3232 }, { "completion_length": 207.71429443359375, "epoch": 3.102687140115163, "grad_norm": 1.605186104774475, "kl": 0.30715247988700867, "learning_rate": 6.841945998780374e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3233 }, { "completion_length": 214.2857208251953, "epoch": 3.103646833013436, "grad_norm": 0.010417484678328037, "kl": 0.30429235100746155, "learning_rate": 6.828024025886387e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3234 }, { "completion_length": 196.7857208251953, "epoch": 3.1046065259117084, "grad_norm": 1.033278226852417, "kl": 0.38815394043922424, "learning_rate": 6.81411399106838e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3235 }, { "completion_length": 212.00001525878906, "epoch": 3.105566218809981, "grad_norm": 0.018614383414387703, "kl": 0.3206575810909271, "learning_rate": 6.800215903464546e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3236 }, { "completion_length": 201.6428680419922, "epoch": 3.1065259117082533, "grad_norm": 0.013693994842469692, "kl": 0.36813682317733765, "learning_rate": 6.786329772205246e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3237 }, { "completion_length": 206.71429443359375, "epoch": 3.107485604606526, "grad_norm": 0.013490852899849415, "kl": 0.3927471339702606, "learning_rate": 6.772455606412952e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3238 }, { "completion_length": 159.2857208251953, "epoch": 3.1084452975047983, "grad_norm": 0.013366395607590675, "kl": 0.3596626818180084, "learning_rate": 6.758593415202315e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3239 }, { "completion_length": 229.71429443359375, "epoch": 3.109404990403071, "grad_norm": 1.1937180757522583, "kl": 0.289135217666626, "learning_rate": 6.744743207680087e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3240 }, { "completion_length": 188.57144165039062, "epoch": 3.1103646833013436, "grad_norm": 0.011554762721061707, "kl": 0.362158864736557, "learning_rate": 6.730904992945152e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3241 }, { "completion_length": 202.1428680419922, "epoch": 3.111324376199616, "grad_norm": 1.433790922164917, "kl": 0.3979402482509613, "learning_rate": 6.71707878008853e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3242 }, { "completion_length": 174.07144165039062, "epoch": 3.1122840690978886, "grad_norm": 1.2263247966766357, "kl": 0.4460993707180023, "learning_rate": 6.70326457819336e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3243 }, { "completion_length": 211.21429443359375, "epoch": 3.113243761996161, "grad_norm": 0.01651371642947197, "kl": 0.3719140887260437, "learning_rate": 6.689462396334869e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3244 }, { "completion_length": 194.2857208251953, "epoch": 3.114203454894434, "grad_norm": 0.06550788134336472, "kl": 0.5531383752822876, "learning_rate": 6.675672243580393e-08, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3245 }, { "completion_length": 167.85714721679688, "epoch": 3.1151631477927064, "grad_norm": 0.067532978951931, "kl": 0.5570069551467896, "learning_rate": 6.66189412898937e-08, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3246 }, { "completion_length": 178.85714721679688, "epoch": 3.116122840690979, "grad_norm": 0.012102663516998291, "kl": 0.34227463603019714, "learning_rate": 6.648128061613334e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3247 }, { "completion_length": 169.07144165039062, "epoch": 3.1170825335892514, "grad_norm": 0.013635868206620216, "kl": 0.42651599645614624, "learning_rate": 6.634374050495909e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3248 }, { "completion_length": 173.50001525878906, "epoch": 3.118042226487524, "grad_norm": 0.014884823933243752, "kl": 0.3858499825000763, "learning_rate": 6.62063210467278e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3249 }, { "completion_length": 113.28572082519531, "epoch": 3.1190019193857967, "grad_norm": 2.3744280338287354, "kl": 0.5535122156143188, "learning_rate": 6.60690223317171e-08, "loss": 0.0006, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3250 }, { "completion_length": 138.5, "epoch": 3.119961612284069, "grad_norm": 0.03976985067129135, "kl": 0.5601146221160889, "learning_rate": 6.593184445012549e-08, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3251 }, { "completion_length": 202.6428680419922, "epoch": 3.1209213051823417, "grad_norm": 0.01609739288687706, "kl": 0.35000482201576233, "learning_rate": 6.57947874920718e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3252 }, { "completion_length": 218.42857360839844, "epoch": 3.121880998080614, "grad_norm": 0.010052323341369629, "kl": 0.2894258499145508, "learning_rate": 6.56578515475957e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3253 }, { "completion_length": 189.07144165039062, "epoch": 3.1228406909788866, "grad_norm": 0.012643915601074696, "kl": 0.3496303856372833, "learning_rate": 6.552103670665724e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3254 }, { "completion_length": 192.2857208251953, "epoch": 3.1238003838771595, "grad_norm": 0.7650982141494751, "kl": 0.37328919768333435, "learning_rate": 6.538434305913673e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3255 }, { "completion_length": 189.50001525878906, "epoch": 3.124760076775432, "grad_norm": 0.009309687651693821, "kl": 0.3135465383529663, "learning_rate": 6.524777069483525e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3256 }, { "completion_length": 237.21429443359375, "epoch": 3.1257197696737045, "grad_norm": 1.40904700756073, "kl": 0.30275121331214905, "learning_rate": 6.51113197034738e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3257 }, { "completion_length": 189.7857208251953, "epoch": 3.126679462571977, "grad_norm": 1.4170109033584595, "kl": 0.3804667592048645, "learning_rate": 6.497499017469402e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3258 }, { "completion_length": 212.6428680419922, "epoch": 3.1276391554702494, "grad_norm": 0.834389865398407, "kl": 0.31828200817108154, "learning_rate": 6.483878219805744e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3259 }, { "completion_length": 184.42857360839844, "epoch": 3.128598848368522, "grad_norm": 0.010125229135155678, "kl": 0.35754939913749695, "learning_rate": 6.470269586304583e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3260 }, { "completion_length": 175.7857208251953, "epoch": 3.129558541266795, "grad_norm": 1.0564863681793213, "kl": 0.37276193499565125, "learning_rate": 6.456673125906115e-08, "loss": 0.0004, "reward": 0.6428571939468384, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.6428571939468384, "step": 3261 }, { "completion_length": 191.1428680419922, "epoch": 3.1305182341650672, "grad_norm": 0.01662151701748371, "kl": 0.3501315414905548, "learning_rate": 6.443088847542539e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3262 }, { "completion_length": 176.50001525878906, "epoch": 3.1314779270633397, "grad_norm": 1.3577299118041992, "kl": 0.37986281514167786, "learning_rate": 6.429516760138032e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3263 }, { "completion_length": 138.21429443359375, "epoch": 3.132437619961612, "grad_norm": 1.0452367067337036, "kl": 0.5119882822036743, "learning_rate": 6.415956872608774e-08, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3264 }, { "completion_length": 183.6428680419922, "epoch": 3.1333973128598847, "grad_norm": 0.013867233879864216, "kl": 0.3379751145839691, "learning_rate": 6.40240919386294e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3265 }, { "completion_length": 184.92857360839844, "epoch": 3.1343570057581576, "grad_norm": 0.8295740485191345, "kl": 0.3672301173210144, "learning_rate": 6.388873732800665e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3266 }, { "completion_length": 155.07144165039062, "epoch": 3.13531669865643, "grad_norm": 1.4818837642669678, "kl": 0.3994984030723572, "learning_rate": 6.375350498314074e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3267 }, { "completion_length": 198.6428680419922, "epoch": 3.1362763915547025, "grad_norm": 0.015415636822581291, "kl": 0.3858235776424408, "learning_rate": 6.361839499287256e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3268 }, { "completion_length": 220.71429443359375, "epoch": 3.137236084452975, "grad_norm": 1.7877284288406372, "kl": 0.2991194725036621, "learning_rate": 6.348340744596244e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3269 }, { "completion_length": 213.00001525878906, "epoch": 3.1381957773512474, "grad_norm": 0.011368916369974613, "kl": 0.33720338344573975, "learning_rate": 6.334854243109062e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3270 }, { "completion_length": 192.42857360839844, "epoch": 3.1391554702495204, "grad_norm": 0.015750087797641754, "kl": 0.331881046295166, "learning_rate": 6.321380003685644e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3271 }, { "completion_length": 181.57144165039062, "epoch": 3.140115163147793, "grad_norm": 1.246639370918274, "kl": 0.3972340524196625, "learning_rate": 6.307918035177912e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3272 }, { "completion_length": 196.85714721679688, "epoch": 3.1410748560460653, "grad_norm": 0.013500741682946682, "kl": 0.347392737865448, "learning_rate": 6.294468346429688e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3273 }, { "completion_length": 167.2857208251953, "epoch": 3.1420345489443378, "grad_norm": 0.01586068980395794, "kl": 0.38262465596199036, "learning_rate": 6.281030946276736e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3274 }, { "completion_length": 241.07144165039062, "epoch": 3.1429942418426102, "grad_norm": 0.00894144643098116, "kl": 0.29374417662620544, "learning_rate": 6.267605843546767e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3275 }, { "completion_length": 176.35714721679688, "epoch": 3.1439539347408827, "grad_norm": 0.012923833914101124, "kl": 0.3395077884197235, "learning_rate": 6.2541930470594e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3276 }, { "completion_length": 176.71429443359375, "epoch": 3.1449136276391556, "grad_norm": 1.3753021955490112, "kl": 0.37234464287757874, "learning_rate": 6.240792565626165e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3277 }, { "completion_length": 184.35714721679688, "epoch": 3.145873320537428, "grad_norm": 0.012239434756338596, "kl": 0.35548892617225647, "learning_rate": 6.227404408050496e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3278 }, { "completion_length": 182.85714721679688, "epoch": 3.1468330134357005, "grad_norm": 0.02295767515897751, "kl": 0.397670716047287, "learning_rate": 6.214028583127759e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3279 }, { "completion_length": 157.71429443359375, "epoch": 3.147792706333973, "grad_norm": 1.4747235774993896, "kl": 0.3777551054954529, "learning_rate": 6.200665099645183e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3280 }, { "completion_length": 214.2857208251953, "epoch": 3.1487523992322455, "grad_norm": 0.02534777857363224, "kl": 0.3620508909225464, "learning_rate": 6.187313966381918e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3281 }, { "completion_length": 211.1428680419922, "epoch": 3.1497120921305184, "grad_norm": 1.5759228467941284, "kl": 0.37833207845687866, "learning_rate": 6.173975192108985e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3282 }, { "completion_length": 206.71429443359375, "epoch": 3.150671785028791, "grad_norm": 0.011701121926307678, "kl": 0.2946940064430237, "learning_rate": 6.160648785589279e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3283 }, { "completion_length": 204.00001525878906, "epoch": 3.1516314779270633, "grad_norm": 1.4995911121368408, "kl": 0.3488054871559143, "learning_rate": 6.147334755577596e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3284 }, { "completion_length": 238.7857208251953, "epoch": 3.152591170825336, "grad_norm": 1.258062720298767, "kl": 0.31851083040237427, "learning_rate": 6.134033110820572e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3285 }, { "completion_length": 161.71429443359375, "epoch": 3.1535508637236083, "grad_norm": 0.0677875205874443, "kl": 0.4533923268318176, "learning_rate": 6.120743860056728e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3286 }, { "completion_length": 222.00001525878906, "epoch": 3.154510556621881, "grad_norm": 1.1223641633987427, "kl": 0.33832427859306335, "learning_rate": 6.107467012016437e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3287 }, { "completion_length": 164.71429443359375, "epoch": 3.1554702495201536, "grad_norm": 1.390771508216858, "kl": 0.42296406626701355, "learning_rate": 6.094202575421906e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3288 }, { "completion_length": 172.35714721679688, "epoch": 3.156429942418426, "grad_norm": 2.0102648735046387, "kl": 0.473021924495697, "learning_rate": 6.080950558987216e-08, "loss": 0.0005, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3289 }, { "completion_length": 175.7857208251953, "epoch": 3.1573896353166986, "grad_norm": 1.9969011545181274, "kl": 0.34987056255340576, "learning_rate": 6.067710971418282e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3290 }, { "completion_length": 197.2857208251953, "epoch": 3.158349328214971, "grad_norm": 1.2596945762634277, "kl": 0.3449448347091675, "learning_rate": 6.054483821412842e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3291 }, { "completion_length": 273.5, "epoch": 3.159309021113244, "grad_norm": 0.8685394525527954, "kl": 0.6227135062217712, "learning_rate": 6.041269117660464e-08, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3292 }, { "completion_length": 158.1428680419922, "epoch": 3.1602687140115164, "grad_norm": 0.011639004573225975, "kl": 0.39528411626815796, "learning_rate": 6.028066868842555e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3293 }, { "completion_length": 172.35714721679688, "epoch": 3.161228406909789, "grad_norm": 0.856852650642395, "kl": 0.3571203351020813, "learning_rate": 6.01487708363232e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3294 }, { "completion_length": 159.0, "epoch": 3.1621880998080614, "grad_norm": 0.7024944424629211, "kl": 0.4329487681388855, "learning_rate": 6.001699770694795e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3295 }, { "completion_length": 171.92857360839844, "epoch": 3.163147792706334, "grad_norm": 0.018558232113718987, "kl": 0.4169312119483948, "learning_rate": 5.988534938686812e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3296 }, { "completion_length": 175.00001525878906, "epoch": 3.1641074856046068, "grad_norm": 0.018692022189497948, "kl": 0.35579460859298706, "learning_rate": 5.975382596256992e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3297 }, { "completion_length": 165.5, "epoch": 3.165067178502879, "grad_norm": 0.8993119597434998, "kl": 0.40539640188217163, "learning_rate": 5.962242752045782e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3298 }, { "completion_length": 203.50001525878906, "epoch": 3.1660268714011517, "grad_norm": 0.011064593680202961, "kl": 0.3287200331687927, "learning_rate": 5.9491154146853824e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3299 }, { "completion_length": 201.71429443359375, "epoch": 3.166986564299424, "grad_norm": 0.020504171028733253, "kl": 0.37509074807167053, "learning_rate": 5.9360005927998115e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3300 }, { "completion_length": 249.1428680419922, "epoch": 3.1679462571976966, "grad_norm": 0.00835102517157793, "kl": 0.26825061440467834, "learning_rate": 5.9228982950048414e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3301 }, { "completion_length": 176.7857208251953, "epoch": 3.168905950095969, "grad_norm": 1.0283799171447754, "kl": 0.3627311885356903, "learning_rate": 5.909808529908017e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3302 }, { "completion_length": 209.00001525878906, "epoch": 3.169865642994242, "grad_norm": 0.9281685948371887, "kl": 0.404068261384964, "learning_rate": 5.896731306108665e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3303 }, { "completion_length": 147.35714721679688, "epoch": 3.1708253358925145, "grad_norm": 0.012753641232848167, "kl": 0.4635902941226959, "learning_rate": 5.8836666321978676e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3304 }, { "completion_length": 215.57144165039062, "epoch": 3.171785028790787, "grad_norm": 0.8025734424591064, "kl": 0.2740035057067871, "learning_rate": 5.8706145167584595e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3305 }, { "completion_length": 183.71429443359375, "epoch": 3.1727447216890594, "grad_norm": 0.07552378624677658, "kl": 0.4167235493659973, "learning_rate": 5.85757496836502e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3306 }, { "completion_length": 217.92857360839844, "epoch": 3.173704414587332, "grad_norm": 1.0014965534210205, "kl": 0.31454142928123474, "learning_rate": 5.8445479955838706e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3307 }, { "completion_length": 200.71429443359375, "epoch": 3.174664107485605, "grad_norm": 0.015626052394509315, "kl": 0.404713898897171, "learning_rate": 5.831533606973088e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3308 }, { "completion_length": 161.07144165039062, "epoch": 3.1756238003838773, "grad_norm": 1.9329293966293335, "kl": 0.44917336106300354, "learning_rate": 5.81853181108248e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3309 }, { "completion_length": 185.07144165039062, "epoch": 3.1765834932821497, "grad_norm": 1.5030832290649414, "kl": 0.36222925782203674, "learning_rate": 5.8055426164535657e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3310 }, { "completion_length": 189.50001525878906, "epoch": 3.177543186180422, "grad_norm": 1.7614330053329468, "kl": 0.38336145877838135, "learning_rate": 5.7925660316195834e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3311 }, { "completion_length": 144.92857360839844, "epoch": 3.1785028790786947, "grad_norm": 1.380660891532898, "kl": 0.47725534439086914, "learning_rate": 5.779602065105518e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3312 }, { "completion_length": 156.85714721679688, "epoch": 3.1794625719769676, "grad_norm": 1.0145905017852783, "kl": 0.43140077590942383, "learning_rate": 5.7666507254280265e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3313 }, { "completion_length": 179.57144165039062, "epoch": 3.18042226487524, "grad_norm": 1.4719619750976562, "kl": 0.46611180901527405, "learning_rate": 5.7537120210955024e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3314 }, { "completion_length": 186.71429443359375, "epoch": 3.1813819577735125, "grad_norm": 0.010235416702926159, "kl": 0.3104409873485565, "learning_rate": 5.7407859606080216e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3315 }, { "completion_length": 189.07144165039062, "epoch": 3.182341650671785, "grad_norm": 0.0238230861723423, "kl": 0.3473142683506012, "learning_rate": 5.7278725524573494e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3316 }, { "completion_length": 181.42857360839844, "epoch": 3.1833013435700575, "grad_norm": 0.018068652600049973, "kl": 0.3825089633464813, "learning_rate": 5.71497180512695e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3317 }, { "completion_length": 163.0, "epoch": 3.18426103646833, "grad_norm": 0.010886655189096928, "kl": 0.380034476518631, "learning_rate": 5.702083727091977e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3318 }, { "completion_length": 160.07144165039062, "epoch": 3.185220729366603, "grad_norm": 0.01047478150576353, "kl": 0.4389522075653076, "learning_rate": 5.689208326819242e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3319 }, { "completion_length": 125.78572082519531, "epoch": 3.1861804222648753, "grad_norm": 1.288930058479309, "kl": 0.5259394645690918, "learning_rate": 5.6763456127672386e-08, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3320 }, { "completion_length": 161.85714721679688, "epoch": 3.1871401151631478, "grad_norm": 1.0340356826782227, "kl": 0.348454087972641, "learning_rate": 5.663495593386117e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3321 }, { "completion_length": 278.5, "epoch": 3.1880998080614202, "grad_norm": 0.009571658447384834, "kl": 0.23941639065742493, "learning_rate": 5.650658277117698e-08, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3322 }, { "completion_length": 169.71429443359375, "epoch": 3.1890595009596927, "grad_norm": 1.040770411491394, "kl": 0.3494597375392914, "learning_rate": 5.6378336723954644e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3323 }, { "completion_length": 210.07144165039062, "epoch": 3.1900191938579656, "grad_norm": 21.885583877563477, "kl": 22.721033096313477, "learning_rate": 5.625021787644532e-08, "loss": 0.0227, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3324 }, { "completion_length": 138.85714721679688, "epoch": 3.190978886756238, "grad_norm": 2.281054735183716, "kl": 0.49783605337142944, "learning_rate": 5.612222631281657e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3325 }, { "completion_length": 127.92857360839844, "epoch": 3.1919385796545106, "grad_norm": 0.015160685405135155, "kl": 0.4697612524032593, "learning_rate": 5.599436211715255e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3326 }, { "completion_length": 152.71429443359375, "epoch": 3.192898272552783, "grad_norm": 1.2447741031646729, "kl": 0.40268003940582275, "learning_rate": 5.586662537345352e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3327 }, { "completion_length": 243.2857208251953, "epoch": 3.1938579654510555, "grad_norm": 0.03274143114686012, "kl": 0.33778464794158936, "learning_rate": 5.57390161656362e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3328 }, { "completion_length": 190.42857360839844, "epoch": 3.1948176583493284, "grad_norm": 0.9807112812995911, "kl": 0.4110477566719055, "learning_rate": 5.56115345775334e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3329 }, { "completion_length": 242.50001525878906, "epoch": 3.195777351247601, "grad_norm": 0.6694984436035156, "kl": 0.24634245038032532, "learning_rate": 5.5484180692894014e-08, "loss": 0.0002, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3330 }, { "completion_length": 229.07144165039062, "epoch": 3.1967370441458733, "grad_norm": 1.0057905912399292, "kl": 0.3431151211261749, "learning_rate": 5.535695459538334e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3331 }, { "completion_length": 229.07144165039062, "epoch": 3.197696737044146, "grad_norm": 1.7698012590408325, "kl": 0.3416958749294281, "learning_rate": 5.5229856368582376e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3332 }, { "completion_length": 226.35714721679688, "epoch": 3.1986564299424183, "grad_norm": 0.5762227773666382, "kl": 0.3215577304363251, "learning_rate": 5.5102886095988424e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3333 }, { "completion_length": 173.00001525878906, "epoch": 3.199616122840691, "grad_norm": 0.0117300134152174, "kl": 0.38186517357826233, "learning_rate": 5.497604386101448e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3334 }, { "completion_length": 207.00001525878906, "epoch": 3.2005758157389637, "grad_norm": 0.01127717550843954, "kl": 0.3127664625644684, "learning_rate": 5.4849329746989495e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3335 }, { "completion_length": 180.71429443359375, "epoch": 3.201535508637236, "grad_norm": 1.9052832126617432, "kl": 0.4313408434391022, "learning_rate": 5.472274383715833e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3336 }, { "completion_length": 227.1428680419922, "epoch": 3.2024952015355086, "grad_norm": 0.012763082049787045, "kl": 0.30750006437301636, "learning_rate": 5.4596286214681625e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3337 }, { "completion_length": 167.57144165039062, "epoch": 3.203454894433781, "grad_norm": 0.03683196380734444, "kl": 0.40918371081352234, "learning_rate": 5.446995696263565e-08, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3338 }, { "completion_length": 197.00001525878906, "epoch": 3.204414587332054, "grad_norm": 0.011135982349514961, "kl": 0.30903443694114685, "learning_rate": 5.434375616401232e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3339 }, { "completion_length": 208.42857360839844, "epoch": 3.2053742802303264, "grad_norm": 0.009430833160877228, "kl": 0.32380732893943787, "learning_rate": 5.4217683901719324e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3340 }, { "completion_length": 184.1428680419922, "epoch": 3.206333973128599, "grad_norm": 0.012059883214533329, "kl": 0.35318228602409363, "learning_rate": 5.409174025857968e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3341 }, { "completion_length": 182.57144165039062, "epoch": 3.2072936660268714, "grad_norm": 0.9545724391937256, "kl": 0.3773929476737976, "learning_rate": 5.396592531733221e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3342 }, { "completion_length": 196.42857360839844, "epoch": 3.208253358925144, "grad_norm": 0.016322264447808266, "kl": 0.38155224919319153, "learning_rate": 5.3840239160630935e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3343 }, { "completion_length": 214.71429443359375, "epoch": 3.2092130518234163, "grad_norm": 0.016473570838570595, "kl": 0.3352981507778168, "learning_rate": 5.3714681871045275e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3344 }, { "completion_length": 185.21429443359375, "epoch": 3.2101727447216892, "grad_norm": 1.3636727333068848, "kl": 0.34415942430496216, "learning_rate": 5.3589253531060166e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3345 }, { "completion_length": 188.50001525878906, "epoch": 3.2111324376199617, "grad_norm": 0.023836283013224602, "kl": 0.37153035402297974, "learning_rate": 5.346395422307565e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3346 }, { "completion_length": 152.7857208251953, "epoch": 3.212092130518234, "grad_norm": 0.017176540568470955, "kl": 0.4184848368167877, "learning_rate": 5.33387840294072e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3347 }, { "completion_length": 188.92857360839844, "epoch": 3.2130518234165066, "grad_norm": 1.0322413444519043, "kl": 0.38361984491348267, "learning_rate": 5.321374303228529e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3348 }, { "completion_length": 236.7857208251953, "epoch": 3.214011516314779, "grad_norm": 0.009414398111402988, "kl": 0.2393866330385208, "learning_rate": 5.308883131385547e-08, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3349 }, { "completion_length": 193.1428680419922, "epoch": 3.214971209213052, "grad_norm": 1.9371919631958008, "kl": 0.34483829140663147, "learning_rate": 5.2964048956178594e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3350 }, { "completion_length": 276.0714416503906, "epoch": 3.2159309021113245, "grad_norm": 0.00987221673130989, "kl": 0.24397392570972443, "learning_rate": 5.2839396041230415e-08, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3351 }, { "completion_length": 194.7857208251953, "epoch": 3.216890595009597, "grad_norm": 2.0238819122314453, "kl": 0.40830662846565247, "learning_rate": 5.271487265090163e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3352 }, { "completion_length": 179.85714721679688, "epoch": 3.2178502879078694, "grad_norm": 1.066960096359253, "kl": 0.33431294560432434, "learning_rate": 5.259047886699774e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3353 }, { "completion_length": 200.50001525878906, "epoch": 3.218809980806142, "grad_norm": 1.109337329864502, "kl": 0.3581647276878357, "learning_rate": 5.2466214771239384e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3354 }, { "completion_length": 169.35714721679688, "epoch": 3.2197696737044144, "grad_norm": 0.008783265016973019, "kl": 0.3030022382736206, "learning_rate": 5.23420804452617e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3355 }, { "completion_length": 159.85714721679688, "epoch": 3.2207293666026873, "grad_norm": 0.010829407721757889, "kl": 0.3936082124710083, "learning_rate": 5.221807597061484e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3356 }, { "completion_length": 190.2857208251953, "epoch": 3.2216890595009597, "grad_norm": 2.0148253440856934, "kl": 0.36267992854118347, "learning_rate": 5.209420142876342e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3357 }, { "completion_length": 189.7857208251953, "epoch": 3.222648752399232, "grad_norm": 1.1759957075119019, "kl": 0.3606126606464386, "learning_rate": 5.197045690108676e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3358 }, { "completion_length": 163.92857360839844, "epoch": 3.2236084452975047, "grad_norm": 1.6637550592422485, "kl": 0.49936699867248535, "learning_rate": 5.1846842468878936e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3359 }, { "completion_length": 213.21429443359375, "epoch": 3.224568138195777, "grad_norm": 0.014765589497983456, "kl": 0.31290626525878906, "learning_rate": 5.172335821334828e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3360 }, { "completion_length": 209.57144165039062, "epoch": 3.22552783109405, "grad_norm": 0.01348440907895565, "kl": 0.3688772916793823, "learning_rate": 5.160000421561789e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3361 }, { "completion_length": 209.07144165039062, "epoch": 3.2264875239923225, "grad_norm": 1.683835744857788, "kl": 0.31827887892723083, "learning_rate": 5.147678055672508e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3362 }, { "completion_length": 155.92857360839844, "epoch": 3.227447216890595, "grad_norm": 1.527917742729187, "kl": 0.4388948976993561, "learning_rate": 5.135368731762152e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3363 }, { "completion_length": 190.00001525878906, "epoch": 3.2284069097888675, "grad_norm": 0.7954938411712646, "kl": 0.3491056561470032, "learning_rate": 5.1230724579173404e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3364 }, { "completion_length": 261.14288330078125, "epoch": 3.22936660268714, "grad_norm": 1.435257911682129, "kl": 0.2776748836040497, "learning_rate": 5.11078924221611e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3365 }, { "completion_length": 163.07144165039062, "epoch": 3.230326295585413, "grad_norm": 0.012228921987116337, "kl": 0.4246727526187897, "learning_rate": 5.0985190927279125e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3366 }, { "completion_length": 197.07144165039062, "epoch": 3.2312859884836853, "grad_norm": 0.025284217670559883, "kl": 0.35293814539909363, "learning_rate": 5.0862620175136206e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3367 }, { "completion_length": 190.35714721679688, "epoch": 3.232245681381958, "grad_norm": 0.9877499938011169, "kl": 0.38733476400375366, "learning_rate": 5.074018024625509e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3368 }, { "completion_length": 181.21429443359375, "epoch": 3.2332053742802302, "grad_norm": 1.018999457359314, "kl": 0.39336317777633667, "learning_rate": 5.061787122107275e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3369 }, { "completion_length": 176.35714721679688, "epoch": 3.2341650671785027, "grad_norm": 1.1380707025527954, "kl": 0.36552694439888, "learning_rate": 5.049569317994012e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3370 }, { "completion_length": 214.42857360839844, "epoch": 3.2351247600767756, "grad_norm": 0.011264149099588394, "kl": 0.2623967230319977, "learning_rate": 5.037364620312201e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3371 }, { "completion_length": 267.5, "epoch": 3.236084452975048, "grad_norm": 0.8458433747291565, "kl": 0.24401359260082245, "learning_rate": 5.025173037079702e-08, "loss": 0.0002, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3372 }, { "completion_length": 185.00001525878906, "epoch": 3.2370441458733206, "grad_norm": 0.020338455215096474, "kl": 0.45628800988197327, "learning_rate": 5.0129945763057954e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3373 }, { "completion_length": 241.4285888671875, "epoch": 3.238003838771593, "grad_norm": 0.015560341067612171, "kl": 0.27458176016807556, "learning_rate": 5.000829245991098e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3374 }, { "completion_length": 178.85714721679688, "epoch": 3.2389635316698655, "grad_norm": 1.778012990951538, "kl": 0.353439062833786, "learning_rate": 4.988677054127638e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3375 }, { "completion_length": 209.57144165039062, "epoch": 3.2399232245681384, "grad_norm": 0.014991042204201221, "kl": 0.4124859869480133, "learning_rate": 4.9765380086987886e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3376 }, { "completion_length": 145.21429443359375, "epoch": 3.240882917466411, "grad_norm": 1.5760999917984009, "kl": 0.4221232235431671, "learning_rate": 4.964412117679284e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3377 }, { "completion_length": 188.57144165039062, "epoch": 3.2418426103646834, "grad_norm": 0.009926333092153072, "kl": 0.35325679183006287, "learning_rate": 4.9522993890352363e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3378 }, { "completion_length": 236.35714721679688, "epoch": 3.242802303262956, "grad_norm": 0.007710924372076988, "kl": 0.26857250928878784, "learning_rate": 4.9401998307241065e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3379 }, { "completion_length": 191.07144165039062, "epoch": 3.2437619961612283, "grad_norm": 1.5073280334472656, "kl": 0.396075576543808, "learning_rate": 4.9281134506946876e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3380 }, { "completion_length": 225.07144165039062, "epoch": 3.2447216890595008, "grad_norm": 0.02607421949505806, "kl": 0.36661723256111145, "learning_rate": 4.9160402568871245e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3381 }, { "completion_length": 177.00001525878906, "epoch": 3.2456813819577737, "grad_norm": 1.2676953077316284, "kl": 0.3621281385421753, "learning_rate": 4.903980257232895e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3382 }, { "completion_length": 197.07144165039062, "epoch": 3.246641074856046, "grad_norm": 1.324533462524414, "kl": 0.38248008489608765, "learning_rate": 4.891933459654821e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3383 }, { "completion_length": 166.35714721679688, "epoch": 3.2476007677543186, "grad_norm": 0.03589216619729996, "kl": 0.4506855010986328, "learning_rate": 4.8798998720670446e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3384 }, { "completion_length": 177.7857208251953, "epoch": 3.248560460652591, "grad_norm": 0.7942749261856079, "kl": 0.3620820939540863, "learning_rate": 4.86787950237503e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3385 }, { "completion_length": 202.85714721679688, "epoch": 3.2495201535508635, "grad_norm": 0.864313006401062, "kl": 0.3239313066005707, "learning_rate": 4.855872358475546e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3386 }, { "completion_length": 176.50001525878906, "epoch": 3.2504798464491365, "grad_norm": 0.07560214400291443, "kl": 0.43096816539764404, "learning_rate": 4.843878448256697e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3387 }, { "completion_length": 204.71429443359375, "epoch": 3.251439539347409, "grad_norm": 3.105755567550659, "kl": 0.3973780572414398, "learning_rate": 4.831897779597865e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3388 }, { "completion_length": 154.0, "epoch": 3.2523992322456814, "grad_norm": 1.596825361251831, "kl": 0.43700236082077026, "learning_rate": 4.8199303603697614e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3389 }, { "completion_length": 165.57144165039062, "epoch": 3.253358925143954, "grad_norm": 0.021168002858757973, "kl": 0.4412745535373688, "learning_rate": 4.807976198434377e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3390 }, { "completion_length": 164.2857208251953, "epoch": 3.2543186180422263, "grad_norm": 1.4892268180847168, "kl": 0.37972673773765564, "learning_rate": 4.796035301644988e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3391 }, { "completion_length": 192.92857360839844, "epoch": 3.255278310940499, "grad_norm": 0.014700510539114475, "kl": 0.34066101908683777, "learning_rate": 4.784107677846175e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3392 }, { "completion_length": 191.7857208251953, "epoch": 3.2562380038387717, "grad_norm": 0.9780909419059753, "kl": 0.3544670641422272, "learning_rate": 4.7721933348737755e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3393 }, { "completion_length": 181.2857208251953, "epoch": 3.257197696737044, "grad_norm": 0.014376808889210224, "kl": 0.4493424892425537, "learning_rate": 4.760292280554931e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3394 }, { "completion_length": 231.2857208251953, "epoch": 3.2581573896353166, "grad_norm": 0.009084256365895271, "kl": 0.30635976791381836, "learning_rate": 4.7484045227080244e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3395 }, { "completion_length": 186.50001525878906, "epoch": 3.259117082533589, "grad_norm": 0.027494993060827255, "kl": 0.4301186501979828, "learning_rate": 4.736530069142716e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3396 }, { "completion_length": 266.71429443359375, "epoch": 3.2600767754318616, "grad_norm": 0.012563078664243221, "kl": 0.26544713973999023, "learning_rate": 4.724668927659928e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3397 }, { "completion_length": 162.21429443359375, "epoch": 3.2610364683301345, "grad_norm": 0.015381628647446632, "kl": 0.34842464327812195, "learning_rate": 4.712821106051843e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3398 }, { "completion_length": 181.42857360839844, "epoch": 3.261996161228407, "grad_norm": 0.03388863056898117, "kl": 0.42477813363075256, "learning_rate": 4.700986612101876e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3399 }, { "completion_length": 141.1428680419922, "epoch": 3.2629558541266794, "grad_norm": 0.049124155193567276, "kl": 0.5042252540588379, "learning_rate": 4.689165453584692e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3400 }, { "completion_length": 148.5, "epoch": 3.263915547024952, "grad_norm": 0.012917077168822289, "kl": 0.3763970136642456, "learning_rate": 4.6773576382662106e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3401 }, { "completion_length": 188.7857208251953, "epoch": 3.2648752399232244, "grad_norm": 0.01302521675825119, "kl": 0.3749796748161316, "learning_rate": 4.6655631739035614e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3402 }, { "completion_length": 184.92857360839844, "epoch": 3.2658349328214973, "grad_norm": 0.01382335927337408, "kl": 0.3884154260158539, "learning_rate": 4.653782068245127e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3403 }, { "completion_length": 196.21429443359375, "epoch": 3.2667946257197698, "grad_norm": 1.4667209386825562, "kl": 0.45993733406066895, "learning_rate": 4.642014329030497e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3404 }, { "completion_length": 181.85714721679688, "epoch": 3.267754318618042, "grad_norm": 0.016833361238241196, "kl": 0.3438555598258972, "learning_rate": 4.6302599639904793e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3405 }, { "completion_length": 207.50001525878906, "epoch": 3.2687140115163147, "grad_norm": 0.009096283465623856, "kl": 0.28622642159461975, "learning_rate": 4.618518980847111e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3406 }, { "completion_length": 170.6428680419922, "epoch": 3.269673704414587, "grad_norm": 0.615847110748291, "kl": 0.39211103320121765, "learning_rate": 4.6067913873136214e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3407 }, { "completion_length": 180.07144165039062, "epoch": 3.27063339731286, "grad_norm": 0.011606439016759396, "kl": 0.3546611964702606, "learning_rate": 4.5950771910944596e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3408 }, { "completion_length": 189.92857360839844, "epoch": 3.2715930902111325, "grad_norm": 0.010753788985311985, "kl": 0.3334066569805145, "learning_rate": 4.5833763998852584e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3409 }, { "completion_length": 210.92857360839844, "epoch": 3.272552783109405, "grad_norm": 0.011522466316819191, "kl": 0.30710601806640625, "learning_rate": 4.571689021372846e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3410 }, { "completion_length": 232.9285888671875, "epoch": 3.2735124760076775, "grad_norm": 0.009419118985533714, "kl": 0.2555704116821289, "learning_rate": 4.560015063235248e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3411 }, { "completion_length": 210.00001525878906, "epoch": 3.27447216890595, "grad_norm": 0.009950729086995125, "kl": 0.31307870149612427, "learning_rate": 4.5483545331416764e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3412 }, { "completion_length": 183.21429443359375, "epoch": 3.275431861804223, "grad_norm": 1.8999823331832886, "kl": 0.4154227674007416, "learning_rate": 4.536707438752505e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3413 }, { "completion_length": 172.71429443359375, "epoch": 3.2763915547024953, "grad_norm": 0.017176976427435875, "kl": 0.3425859808921814, "learning_rate": 4.5250737877192884e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3414 }, { "completion_length": 169.6428680419922, "epoch": 3.277351247600768, "grad_norm": 0.02063732035458088, "kl": 0.398538202047348, "learning_rate": 4.5134535876847595e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3415 }, { "completion_length": 175.42857360839844, "epoch": 3.2783109404990403, "grad_norm": 0.8186850547790527, "kl": 0.39481881260871887, "learning_rate": 4.5018468462827945e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3416 }, { "completion_length": 286.2857360839844, "epoch": 3.2792706333973127, "grad_norm": 0.012696942314505577, "kl": 0.24703258275985718, "learning_rate": 4.4902535711384554e-08, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3417 }, { "completion_length": 169.42857360839844, "epoch": 3.2802303262955856, "grad_norm": 1.6427967548370361, "kl": 0.6834100484848022, "learning_rate": 4.4786737698679357e-08, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3418 }, { "completion_length": 180.92857360839844, "epoch": 3.281190019193858, "grad_norm": 0.010866389609873295, "kl": 0.3442158102989197, "learning_rate": 4.4671074500785725e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3419 }, { "completion_length": 208.2857208251953, "epoch": 3.2821497120921306, "grad_norm": 0.04551377147436142, "kl": 0.3415791392326355, "learning_rate": 4.4555546193688734e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3420 }, { "completion_length": 123.71429443359375, "epoch": 3.283109404990403, "grad_norm": 1.1490848064422607, "kl": 0.46857041120529175, "learning_rate": 4.444015285328456e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3421 }, { "completion_length": 151.92857360839844, "epoch": 3.2840690978886755, "grad_norm": 0.008249355480074883, "kl": 0.35586628317832947, "learning_rate": 4.4324894555380936e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3422 }, { "completion_length": 183.57144165039062, "epoch": 3.285028790786948, "grad_norm": 0.01571015641093254, "kl": 0.3601297438144684, "learning_rate": 4.420977137569673e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3423 }, { "completion_length": 171.92857360839844, "epoch": 3.285988483685221, "grad_norm": 0.03406418859958649, "kl": 0.49472054839134216, "learning_rate": 4.409478338986203e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3424 }, { "completion_length": 233.57144165039062, "epoch": 3.2869481765834934, "grad_norm": 0.010034061037003994, "kl": 0.28153151273727417, "learning_rate": 4.397993067341824e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3425 }, { "completion_length": 179.6428680419922, "epoch": 3.287907869481766, "grad_norm": 1.6778273582458496, "kl": 0.4802468419075012, "learning_rate": 4.386521330181789e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3426 }, { "completion_length": 147.71429443359375, "epoch": 3.2888675623800383, "grad_norm": 0.009868972934782505, "kl": 0.42882341146469116, "learning_rate": 4.375063135042445e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3427 }, { "completion_length": 211.42857360839844, "epoch": 3.2898272552783108, "grad_norm": 0.011690856888890266, "kl": 0.322753369808197, "learning_rate": 4.363618489451246e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3428 }, { "completion_length": 209.07144165039062, "epoch": 3.2907869481765837, "grad_norm": 0.051517046988010406, "kl": 0.4149901568889618, "learning_rate": 4.352187400926763e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3429 }, { "completion_length": 244.7857208251953, "epoch": 3.291746641074856, "grad_norm": 0.035140931606292725, "kl": 0.3029818534851074, "learning_rate": 4.340769876978634e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3430 }, { "completion_length": 156.0, "epoch": 3.2927063339731286, "grad_norm": 0.01717093214392662, "kl": 0.44637468457221985, "learning_rate": 4.3293659251076114e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3431 }, { "completion_length": 194.2857208251953, "epoch": 3.293666026871401, "grad_norm": 1.2549142837524414, "kl": 0.42467519640922546, "learning_rate": 4.317975552805517e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3432 }, { "completion_length": 181.07144165039062, "epoch": 3.2946257197696736, "grad_norm": 1.0562002658843994, "kl": 0.34791770577430725, "learning_rate": 4.306598767555239e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3433 }, { "completion_length": 124.5714340209961, "epoch": 3.295585412667946, "grad_norm": 1.192312240600586, "kl": 0.4336768388748169, "learning_rate": 4.295235576830772e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3434 }, { "completion_length": 143.92857360839844, "epoch": 3.296545105566219, "grad_norm": 1.4584773778915405, "kl": 0.41416558623313904, "learning_rate": 4.2838859880971495e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3435 }, { "completion_length": 190.1428680419922, "epoch": 3.2975047984644914, "grad_norm": 1.3999353647232056, "kl": 0.3190488815307617, "learning_rate": 4.272550008810494e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3436 }, { "completion_length": 177.92857360839844, "epoch": 3.298464491362764, "grad_norm": 0.014770539477467537, "kl": 0.3327805995941162, "learning_rate": 4.2612276464179667e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3437 }, { "completion_length": 169.7857208251953, "epoch": 3.2994241842610363, "grad_norm": 1.6301257610321045, "kl": 0.3760818839073181, "learning_rate": 4.2499189083577885e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3438 }, { "completion_length": 164.35714721679688, "epoch": 3.300383877159309, "grad_norm": 1.3726352453231812, "kl": 0.4204122722148895, "learning_rate": 4.238623802059235e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3439 }, { "completion_length": 204.92857360839844, "epoch": 3.3013435700575817, "grad_norm": 1.391489028930664, "kl": 0.301832377910614, "learning_rate": 4.227342334942629e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3440 }, { "completion_length": 164.71429443359375, "epoch": 3.302303262955854, "grad_norm": 1.8983862400054932, "kl": 0.39371827244758606, "learning_rate": 4.2160745144193295e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3441 }, { "completion_length": 177.92857360839844, "epoch": 3.3032629558541267, "grad_norm": 1.649993896484375, "kl": 0.4967503547668457, "learning_rate": 4.2048203478917215e-08, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3442 }, { "completion_length": 158.35714721679688, "epoch": 3.304222648752399, "grad_norm": 2.366122245788574, "kl": 0.3756968677043915, "learning_rate": 4.1935798427532225e-08, "loss": 0.0004, "reward": 0.6428571939468384, "reward_std": 0.5050762891769409, "rewards/check_gptzero_func": 0.6428571939468384, "step": 3443 }, { "completion_length": 170.92857360839844, "epoch": 3.3051823416506716, "grad_norm": 0.01575535535812378, "kl": 0.363324373960495, "learning_rate": 4.1823530063882894e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3444 }, { "completion_length": 213.2857208251953, "epoch": 3.3061420345489445, "grad_norm": 1.2458341121673584, "kl": 0.26636409759521484, "learning_rate": 4.171139846172394e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3445 }, { "completion_length": 171.50001525878906, "epoch": 3.307101727447217, "grad_norm": 0.00902417954057455, "kl": 0.32353994250297546, "learning_rate": 4.1599403694720145e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3446 }, { "completion_length": 179.7857208251953, "epoch": 3.3080614203454894, "grad_norm": 0.011754798702895641, "kl": 0.37324121594429016, "learning_rate": 4.1487545836446397e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3447 }, { "completion_length": 148.7857208251953, "epoch": 3.309021113243762, "grad_norm": 0.018088221549987793, "kl": 0.4430065453052521, "learning_rate": 4.1375824960387776e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3448 }, { "completion_length": 190.85714721679688, "epoch": 3.3099808061420344, "grad_norm": 0.016070270910859108, "kl": 0.38539591431617737, "learning_rate": 4.1264241139939205e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3449 }, { "completion_length": 170.7857208251953, "epoch": 3.3109404990403073, "grad_norm": 0.9163668751716614, "kl": 0.36102813482284546, "learning_rate": 4.115279444840575e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3450 }, { "completion_length": 177.42857360839844, "epoch": 3.3119001919385798, "grad_norm": 1.591328501701355, "kl": 0.41486915946006775, "learning_rate": 4.104148495900225e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3451 }, { "completion_length": 209.35714721679688, "epoch": 3.3128598848368522, "grad_norm": 0.01806134358048439, "kl": 0.2932356595993042, "learning_rate": 4.0930312744853394e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3452 }, { "completion_length": 208.7857208251953, "epoch": 3.3138195777351247, "grad_norm": 1.442160725593567, "kl": 0.33678096532821655, "learning_rate": 4.0819277878993805e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3453 }, { "completion_length": 223.21429443359375, "epoch": 3.314779270633397, "grad_norm": 0.945232629776001, "kl": 0.27760571241378784, "learning_rate": 4.0708380434367864e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3454 }, { "completion_length": 242.50001525878906, "epoch": 3.31573896353167, "grad_norm": 0.009179554879665375, "kl": 0.2578478753566742, "learning_rate": 4.0597620483829556e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3455 }, { "completion_length": 156.92857360839844, "epoch": 3.3166986564299425, "grad_norm": 0.016846805810928345, "kl": 0.42970719933509827, "learning_rate": 4.048699810014261e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3456 }, { "completion_length": 188.07144165039062, "epoch": 3.317658349328215, "grad_norm": 0.010223242454230785, "kl": 0.40999236702919006, "learning_rate": 4.037651335598036e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3457 }, { "completion_length": 195.42857360839844, "epoch": 3.3186180422264875, "grad_norm": 0.010634861886501312, "kl": 0.3144984841346741, "learning_rate": 4.026616632392574e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3458 }, { "completion_length": 169.6428680419922, "epoch": 3.31957773512476, "grad_norm": 0.01474884431809187, "kl": 0.43432173132896423, "learning_rate": 4.015595707647132e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3459 }, { "completion_length": 201.35714721679688, "epoch": 3.320537428023033, "grad_norm": 1.1451202630996704, "kl": 0.30586186051368713, "learning_rate": 4.0045885686018965e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3460 }, { "completion_length": 186.35714721679688, "epoch": 3.3214971209213053, "grad_norm": 0.9426366090774536, "kl": 0.30488622188568115, "learning_rate": 3.9935952224879974e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3461 }, { "completion_length": 217.71429443359375, "epoch": 3.322456813819578, "grad_norm": 0.849443793296814, "kl": 0.295035719871521, "learning_rate": 3.982615676527526e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3462 }, { "completion_length": 185.21429443359375, "epoch": 3.3234165067178503, "grad_norm": 0.010776092298328876, "kl": 0.34221282601356506, "learning_rate": 3.971649937933477e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3463 }, { "completion_length": 196.6428680419922, "epoch": 3.3243761996161227, "grad_norm": 0.011545795015990734, "kl": 0.3736788332462311, "learning_rate": 3.9606980139098075e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3464 }, { "completion_length": 182.71429443359375, "epoch": 3.325335892514395, "grad_norm": 0.009226403199136257, "kl": 0.3325958251953125, "learning_rate": 3.9497599116513705e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3465 }, { "completion_length": 180.1428680419922, "epoch": 3.326295585412668, "grad_norm": 1.5834730863571167, "kl": 0.3561933636665344, "learning_rate": 3.938835638343948e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3466 }, { "completion_length": 164.7857208251953, "epoch": 3.3272552783109406, "grad_norm": 0.012034890241920948, "kl": 0.37589699029922485, "learning_rate": 3.9279252011642506e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3467 }, { "completion_length": 166.57144165039062, "epoch": 3.328214971209213, "grad_norm": 1.2235965728759766, "kl": 0.3908167779445648, "learning_rate": 3.9170286072798734e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3468 }, { "completion_length": 205.57144165039062, "epoch": 3.3291746641074855, "grad_norm": 0.012631581164896488, "kl": 0.32849299907684326, "learning_rate": 3.906145863849347e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3469 }, { "completion_length": 163.5, "epoch": 3.330134357005758, "grad_norm": 0.01324083749204874, "kl": 0.447258323431015, "learning_rate": 3.89527697802208e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3470 }, { "completion_length": 168.7857208251953, "epoch": 3.3310940499040305, "grad_norm": 1.2409863471984863, "kl": 0.39796027541160583, "learning_rate": 3.884421956938377e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3471 }, { "completion_length": 146.71429443359375, "epoch": 3.3320537428023034, "grad_norm": 0.9746910929679871, "kl": 0.4094788730144501, "learning_rate": 3.873580807729451e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3472 }, { "completion_length": 170.42857360839844, "epoch": 3.333013435700576, "grad_norm": 0.018223507329821587, "kl": 0.41776877641677856, "learning_rate": 3.862753537517396e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3473 }, { "completion_length": 218.50001525878906, "epoch": 3.3339731285988483, "grad_norm": 0.01123346108943224, "kl": 0.2889052927494049, "learning_rate": 3.851940153415178e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3474 }, { "completion_length": 162.5, "epoch": 3.334932821497121, "grad_norm": 0.01344522088766098, "kl": 0.43888819217681885, "learning_rate": 3.841140662526648e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3475 }, { "completion_length": 163.35714721679688, "epoch": 3.3358925143953932, "grad_norm": 0.016909053549170494, "kl": 0.3627292513847351, "learning_rate": 3.830355071946534e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3476 }, { "completion_length": 213.50001525878906, "epoch": 3.336852207293666, "grad_norm": 0.5900587439537048, "kl": 0.39239853620529175, "learning_rate": 3.8195833887604194e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3477 }, { "completion_length": 166.0, "epoch": 3.3378119001919386, "grad_norm": 2.67940354347229, "kl": 0.40208375453948975, "learning_rate": 3.8088256200447725e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3478 }, { "completion_length": 186.50001525878906, "epoch": 3.338771593090211, "grad_norm": 0.014138191007077694, "kl": 0.3845728039741516, "learning_rate": 3.7980817728668986e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3479 }, { "completion_length": 186.35714721679688, "epoch": 3.3397312859884836, "grad_norm": 1.2862921953201294, "kl": 0.3379290997982025, "learning_rate": 3.787351854284959e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3480 }, { "completion_length": 181.92857360839844, "epoch": 3.340690978886756, "grad_norm": 0.844231903553009, "kl": 0.3033373951911926, "learning_rate": 3.776635871347988e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3481 }, { "completion_length": 166.0, "epoch": 3.341650671785029, "grad_norm": 1.693925380706787, "kl": 0.4040074646472931, "learning_rate": 3.765933831095833e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3482 }, { "completion_length": 140.57144165039062, "epoch": 3.3426103646833014, "grad_norm": 1.9970760345458984, "kl": 0.5173060297966003, "learning_rate": 3.75524574055921e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3483 }, { "completion_length": 179.71429443359375, "epoch": 3.343570057581574, "grad_norm": 1.4512434005737305, "kl": 0.3946675956249237, "learning_rate": 3.74457160675965e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3484 }, { "completion_length": 190.71429443359375, "epoch": 3.3445297504798464, "grad_norm": 1.3862700462341309, "kl": 0.3311796188354492, "learning_rate": 3.733911436709522e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3485 }, { "completion_length": 218.7857208251953, "epoch": 3.345489443378119, "grad_norm": 0.02548537403345108, "kl": 0.42843687534332275, "learning_rate": 3.723265237412024e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3486 }, { "completion_length": 165.7857208251953, "epoch": 3.3464491362763917, "grad_norm": 1.3749775886535645, "kl": 0.47644028067588806, "learning_rate": 3.71263301586118e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3487 }, { "completion_length": 197.57144165039062, "epoch": 3.347408829174664, "grad_norm": 1.4539092779159546, "kl": 0.33015474677085876, "learning_rate": 3.702014779041826e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3488 }, { "completion_length": 182.71429443359375, "epoch": 3.3483685220729367, "grad_norm": 0.013727733865380287, "kl": 0.42727717757225037, "learning_rate": 3.691410533929598e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3489 }, { "completion_length": 238.50001525878906, "epoch": 3.349328214971209, "grad_norm": 0.010401175357401371, "kl": 0.26322871446609497, "learning_rate": 3.680820287490968e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3490 }, { "completion_length": 185.07144165039062, "epoch": 3.3502879078694816, "grad_norm": 1.2743200063705444, "kl": 0.35213416814804077, "learning_rate": 3.670244046683183e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3491 }, { "completion_length": 213.57144165039062, "epoch": 3.3512476007677545, "grad_norm": 1.1205096244812012, "kl": 0.4030877351760864, "learning_rate": 3.659681818454316e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3492 }, { "completion_length": 173.50001525878906, "epoch": 3.352207293666027, "grad_norm": 0.013551228679716587, "kl": 0.3889549672603607, "learning_rate": 3.649133609743213e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3493 }, { "completion_length": 193.50001525878906, "epoch": 3.3531669865642995, "grad_norm": 3.35969877243042, "kl": 0.4414280354976654, "learning_rate": 3.638599427479511e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3494 }, { "completion_length": 197.6428680419922, "epoch": 3.354126679462572, "grad_norm": 1.0118309259414673, "kl": 0.3083381652832031, "learning_rate": 3.6280792785836536e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3495 }, { "completion_length": 170.5, "epoch": 3.3550863723608444, "grad_norm": 0.008563642390072346, "kl": 0.34847232699394226, "learning_rate": 3.617573169966837e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3496 }, { "completion_length": 216.07144165039062, "epoch": 3.3560460652591173, "grad_norm": 0.008057001046836376, "kl": 0.27900534868240356, "learning_rate": 3.607081108531057e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3497 }, { "completion_length": 229.50001525878906, "epoch": 3.3570057581573898, "grad_norm": 0.009823578409850597, "kl": 0.2779157757759094, "learning_rate": 3.596603101169068e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3498 }, { "completion_length": 206.6428680419922, "epoch": 3.3579654510556622, "grad_norm": 0.01464476715773344, "kl": 0.3721863627433777, "learning_rate": 3.586139154764389e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3499 }, { "completion_length": 211.71429443359375, "epoch": 3.3589251439539347, "grad_norm": 1.2841593027114868, "kl": 0.3782767653465271, "learning_rate": 3.575689276191313e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3500 }, { "completion_length": 180.6428680419922, "epoch": 3.359884836852207, "grad_norm": 1.0116389989852905, "kl": 0.3576003313064575, "learning_rate": 3.565253472314891e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3501 }, { "completion_length": 205.21429443359375, "epoch": 3.36084452975048, "grad_norm": 0.7739043831825256, "kl": 0.3639542758464813, "learning_rate": 3.5548317499909164e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3502 }, { "completion_length": 170.0, "epoch": 3.3618042226487526, "grad_norm": 0.008773081935942173, "kl": 0.3700421452522278, "learning_rate": 3.5444241160659304e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3503 }, { "completion_length": 243.1428680419922, "epoch": 3.362763915547025, "grad_norm": 0.008053085766732693, "kl": 0.24490244686603546, "learning_rate": 3.5340305773772425e-08, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3504 }, { "completion_length": 203.2857208251953, "epoch": 3.3637236084452975, "grad_norm": 0.019142137840390205, "kl": 0.3846403956413269, "learning_rate": 3.523651140752867e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3505 }, { "completion_length": 258.4285888671875, "epoch": 3.36468330134357, "grad_norm": 1.0064114332199097, "kl": 0.247976616024971, "learning_rate": 3.51328581301159e-08, "loss": 0.0002, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3506 }, { "completion_length": 194.42857360839844, "epoch": 3.3656429942418424, "grad_norm": 0.8226612210273743, "kl": 0.4613643288612366, "learning_rate": 3.5029346009629064e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3507 }, { "completion_length": 221.6428680419922, "epoch": 3.3666026871401153, "grad_norm": 1.6233549118041992, "kl": 0.34335950016975403, "learning_rate": 3.492597511407033e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3508 }, { "completion_length": 235.57144165039062, "epoch": 3.367562380038388, "grad_norm": 0.010987227782607079, "kl": 0.27353766560554504, "learning_rate": 3.482274551134931e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3509 }, { "completion_length": 154.42857360839844, "epoch": 3.3685220729366603, "grad_norm": 1.6858254671096802, "kl": 0.4574267864227295, "learning_rate": 3.471965726928258e-08, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3510 }, { "completion_length": 171.6428680419922, "epoch": 3.3694817658349328, "grad_norm": 0.013889474794268608, "kl": 0.379250168800354, "learning_rate": 3.4616710455594027e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3511 }, { "completion_length": 182.85714721679688, "epoch": 3.370441458733205, "grad_norm": 1.3322975635528564, "kl": 0.3553725481033325, "learning_rate": 3.451390513791452e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3512 }, { "completion_length": 187.6428680419922, "epoch": 3.3714011516314777, "grad_norm": 0.01125421840697527, "kl": 0.3262549042701721, "learning_rate": 3.4411241383781904e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3513 }, { "completion_length": 216.2857208251953, "epoch": 3.3723608445297506, "grad_norm": 0.015436407178640366, "kl": 0.3355487287044525, "learning_rate": 3.430871926064116e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3514 }, { "completion_length": 180.21429443359375, "epoch": 3.373320537428023, "grad_norm": 0.013312231749296188, "kl": 0.3641715943813324, "learning_rate": 3.4206338835844255e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3515 }, { "completion_length": 212.21429443359375, "epoch": 3.3742802303262955, "grad_norm": 0.02265801653265953, "kl": 0.3397293984889984, "learning_rate": 3.4104100176649917e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3516 }, { "completion_length": 157.5, "epoch": 3.375239923224568, "grad_norm": 0.011261946521699429, "kl": 0.3876422345638275, "learning_rate": 3.4002003350223824e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3517 }, { "completion_length": 168.71429443359375, "epoch": 3.3761996161228405, "grad_norm": 1.0766446590423584, "kl": 0.3962523639202118, "learning_rate": 3.3900048423638415e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3518 }, { "completion_length": 172.71429443359375, "epoch": 3.3771593090211134, "grad_norm": 1.7269827127456665, "kl": 0.34912630915641785, "learning_rate": 3.3798235463873e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3519 }, { "completion_length": 214.00001525878906, "epoch": 3.378119001919386, "grad_norm": 0.0092119500041008, "kl": 0.275715708732605, "learning_rate": 3.3696564537813664e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3520 }, { "completion_length": 259.14288330078125, "epoch": 3.3790786948176583, "grad_norm": 1.4100468158721924, "kl": 0.29890769720077515, "learning_rate": 3.3595035712253e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3521 }, { "completion_length": 115.00000762939453, "epoch": 3.380038387715931, "grad_norm": 0.03250687196850777, "kl": 0.5332241058349609, "learning_rate": 3.349364905389032e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3522 }, { "completion_length": 189.50001525878906, "epoch": 3.3809980806142033, "grad_norm": 0.039191536605358124, "kl": 0.4164794087409973, "learning_rate": 3.339240462933171e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3523 }, { "completion_length": 234.6428680419922, "epoch": 3.381957773512476, "grad_norm": 0.009892325848340988, "kl": 0.2493794709444046, "learning_rate": 3.329130250508952e-08, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3524 }, { "completion_length": 172.50001525878906, "epoch": 3.3829174664107486, "grad_norm": 0.008528532460331917, "kl": 0.35501986742019653, "learning_rate": 3.3190342747582866e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3525 }, { "completion_length": 187.35714721679688, "epoch": 3.383877159309021, "grad_norm": 0.02111411839723587, "kl": 0.3885668218135834, "learning_rate": 3.308952542313725e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3526 }, { "completion_length": 144.85714721679688, "epoch": 3.3848368522072936, "grad_norm": 0.9858481884002686, "kl": 0.38522881269454956, "learning_rate": 3.298885059798448e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3527 }, { "completion_length": 203.50001525878906, "epoch": 3.385796545105566, "grad_norm": 0.014320503920316696, "kl": 0.35474342107772827, "learning_rate": 3.288831833826297e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3528 }, { "completion_length": 214.7857208251953, "epoch": 3.386756238003839, "grad_norm": 1.1640952825546265, "kl": 0.30570635199546814, "learning_rate": 3.278792871001737e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3529 }, { "completion_length": 201.42857360839844, "epoch": 3.3877159309021114, "grad_norm": 1.0884817838668823, "kl": 0.29971814155578613, "learning_rate": 3.268768177919859e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3530 }, { "completion_length": 165.1428680419922, "epoch": 3.388675623800384, "grad_norm": 0.021537166088819504, "kl": 0.3596406877040863, "learning_rate": 3.258757761166386e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3531 }, { "completion_length": 198.71429443359375, "epoch": 3.3896353166986564, "grad_norm": 0.01604737527668476, "kl": 0.3550778329372406, "learning_rate": 3.248761627317648e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3532 }, { "completion_length": 157.2857208251953, "epoch": 3.390595009596929, "grad_norm": 0.022681569680571556, "kl": 0.41171592473983765, "learning_rate": 3.238779782940615e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3533 }, { "completion_length": 181.00001525878906, "epoch": 3.3915547024952017, "grad_norm": 0.012900437228381634, "kl": 0.37927955389022827, "learning_rate": 3.228812234592859e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3534 }, { "completion_length": 231.07144165039062, "epoch": 3.392514395393474, "grad_norm": 0.009158235974609852, "kl": 0.2879876494407654, "learning_rate": 3.218858988822554e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3535 }, { "completion_length": 202.50001525878906, "epoch": 3.3934740882917467, "grad_norm": 0.018289275467395782, "kl": 0.3447071611881256, "learning_rate": 3.208920052168476e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3536 }, { "completion_length": 141.07144165039062, "epoch": 3.394433781190019, "grad_norm": 1.7981233596801758, "kl": 0.4132958948612213, "learning_rate": 3.198995431160023e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3537 }, { "completion_length": 203.92857360839844, "epoch": 3.3953934740882916, "grad_norm": 0.05022910609841347, "kl": 0.4298482835292816, "learning_rate": 3.18908513231716e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3538 }, { "completion_length": 178.50001525878906, "epoch": 3.3963531669865645, "grad_norm": 0.010805740021169186, "kl": 0.3536537289619446, "learning_rate": 3.179189162150464e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3539 }, { "completion_length": 175.71429443359375, "epoch": 3.397312859884837, "grad_norm": 1.6965484619140625, "kl": 0.4015365540981293, "learning_rate": 3.169307527161086e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3540 }, { "completion_length": 191.71429443359375, "epoch": 3.3982725527831095, "grad_norm": 0.014314915984869003, "kl": 0.4064101576805115, "learning_rate": 3.159440233840763e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3541 }, { "completion_length": 199.6428680419922, "epoch": 3.399232245681382, "grad_norm": 1.1449872255325317, "kl": 0.28758394718170166, "learning_rate": 3.149587288671815e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3542 }, { "completion_length": 241.35714721679688, "epoch": 3.4001919385796544, "grad_norm": 0.007342348340898752, "kl": 0.2602629065513611, "learning_rate": 3.139748698127129e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3543 }, { "completion_length": 240.4285888671875, "epoch": 3.401151631477927, "grad_norm": 0.01077112928032875, "kl": 0.2810918986797333, "learning_rate": 3.12992446867017e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3544 }, { "completion_length": 152.71429443359375, "epoch": 3.4021113243762, "grad_norm": 0.01395385805517435, "kl": 0.4087679982185364, "learning_rate": 3.1201146067549616e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3545 }, { "completion_length": 271.71429443359375, "epoch": 3.4030710172744723, "grad_norm": 0.014406105503439903, "kl": 0.2850496768951416, "learning_rate": 3.1103191188260824e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3546 }, { "completion_length": 200.7857208251953, "epoch": 3.4040307101727447, "grad_norm": 0.9502128958702087, "kl": 0.3038578927516937, "learning_rate": 3.100538011318685e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3547 }, { "completion_length": 189.6428680419922, "epoch": 3.404990403071017, "grad_norm": 1.2549560070037842, "kl": 0.362364798784256, "learning_rate": 3.09077129065847e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3548 }, { "completion_length": 145.2857208251953, "epoch": 3.4059500959692897, "grad_norm": 1.5802242755889893, "kl": 0.4266883432865143, "learning_rate": 3.081018963261678e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3549 }, { "completion_length": 238.6428680419922, "epoch": 3.4069097888675626, "grad_norm": 0.011980446055531502, "kl": 0.2790328860282898, "learning_rate": 3.071281035535089e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3550 }, { "completion_length": 161.6428680419922, "epoch": 3.407869481765835, "grad_norm": 1.7351107597351074, "kl": 0.3549138009548187, "learning_rate": 3.061557513876051e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3551 }, { "completion_length": 199.21429443359375, "epoch": 3.4088291746641075, "grad_norm": 0.014529693871736526, "kl": 0.32785558700561523, "learning_rate": 3.051848404672411e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3552 }, { "completion_length": 219.71429443359375, "epoch": 3.40978886756238, "grad_norm": 0.01958794705569744, "kl": 0.394290566444397, "learning_rate": 3.042153714302581e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3553 }, { "completion_length": 188.2857208251953, "epoch": 3.4107485604606524, "grad_norm": 0.09442777931690216, "kl": 0.462251216173172, "learning_rate": 3.032473449135481e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3554 }, { "completion_length": 201.00001525878906, "epoch": 3.411708253358925, "grad_norm": 0.016731340438127518, "kl": 0.34729254245758057, "learning_rate": 3.022807615530554e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3555 }, { "completion_length": 210.00001525878906, "epoch": 3.412667946257198, "grad_norm": 0.014797693118453026, "kl": 0.31002941727638245, "learning_rate": 3.013156219837776e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3556 }, { "completion_length": 195.85714721679688, "epoch": 3.4136276391554703, "grad_norm": 0.010013354010879993, "kl": 0.34455832839012146, "learning_rate": 3.00351926839762e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3557 }, { "completion_length": 198.07144165039062, "epoch": 3.4145873320537428, "grad_norm": 0.012435073964297771, "kl": 0.354082316160202, "learning_rate": 2.993896767541088e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3558 }, { "completion_length": 177.00001525878906, "epoch": 3.4155470249520152, "grad_norm": 0.8878587484359741, "kl": 0.3652498126029968, "learning_rate": 2.984288723589679e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3559 }, { "completion_length": 190.35714721679688, "epoch": 3.4165067178502877, "grad_norm": 0.011071319691836834, "kl": 0.3126908242702484, "learning_rate": 2.974695142855388e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3560 }, { "completion_length": 149.42857360839844, "epoch": 3.4174664107485606, "grad_norm": 0.014956893399357796, "kl": 0.4057186543941498, "learning_rate": 2.9651160316407215e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3561 }, { "completion_length": 171.35714721679688, "epoch": 3.418426103646833, "grad_norm": 2.0897834300994873, "kl": 0.3848886787891388, "learning_rate": 2.9555513962386785e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3562 }, { "completion_length": 196.1428680419922, "epoch": 3.4193857965451055, "grad_norm": 0.7326266169548035, "kl": 0.33174386620521545, "learning_rate": 2.9460012429327385e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3563 }, { "completion_length": 169.2857208251953, "epoch": 3.420345489443378, "grad_norm": 0.011582485400140285, "kl": 0.35323211550712585, "learning_rate": 2.9364655779968718e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3564 }, { "completion_length": 205.35714721679688, "epoch": 3.4213051823416505, "grad_norm": 0.01834368333220482, "kl": 0.2944609820842743, "learning_rate": 2.926944407695539e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3565 }, { "completion_length": 197.1428680419922, "epoch": 3.4222648752399234, "grad_norm": 0.013060248456895351, "kl": 0.3805580735206604, "learning_rate": 2.9174377382836596e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3566 }, { "completion_length": 142.2857208251953, "epoch": 3.423224568138196, "grad_norm": 0.013063759543001652, "kl": 0.4552658796310425, "learning_rate": 2.9079455760066567e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3567 }, { "completion_length": 222.7857208251953, "epoch": 3.4241842610364683, "grad_norm": 0.018398510292172432, "kl": 0.3205798864364624, "learning_rate": 2.8984679271003815e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3568 }, { "completion_length": 214.1428680419922, "epoch": 3.425143953934741, "grad_norm": 0.7969011664390564, "kl": 0.2822529673576355, "learning_rate": 2.8890047977911837e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3569 }, { "completion_length": 207.57144165039062, "epoch": 3.4261036468330133, "grad_norm": 1.032339096069336, "kl": 0.37598717212677, "learning_rate": 2.8795561942958735e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3570 }, { "completion_length": 157.1428680419922, "epoch": 3.427063339731286, "grad_norm": 0.00962540041655302, "kl": 0.39727887511253357, "learning_rate": 2.8701221228216915e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3571 }, { "completion_length": 176.50001525878906, "epoch": 3.4280230326295587, "grad_norm": 0.011916808784008026, "kl": 0.3756439983844757, "learning_rate": 2.860702589566366e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3572 }, { "completion_length": 170.57144165039062, "epoch": 3.428982725527831, "grad_norm": 0.011170357465744019, "kl": 0.36464056372642517, "learning_rate": 2.85129760071805e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3573 }, { "completion_length": 198.2857208251953, "epoch": 3.4299424184261036, "grad_norm": 0.011385355144739151, "kl": 0.33378446102142334, "learning_rate": 2.8419071624553425e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3574 }, { "completion_length": 208.1428680419922, "epoch": 3.430902111324376, "grad_norm": 1.4657708406448364, "kl": 0.3282209038734436, "learning_rate": 2.832531280947295e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3575 }, { "completion_length": 187.21429443359375, "epoch": 3.431861804222649, "grad_norm": 0.011502538807690144, "kl": 0.3577307462692261, "learning_rate": 2.8231699623534e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3576 }, { "completion_length": 158.57144165039062, "epoch": 3.4328214971209214, "grad_norm": 1.123162031173706, "kl": 0.44215816259384155, "learning_rate": 2.8138232128235654e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3577 }, { "completion_length": 270.2857360839844, "epoch": 3.433781190019194, "grad_norm": 0.031122252345085144, "kl": 0.29175207018852234, "learning_rate": 2.8044910384981357e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3578 }, { "completion_length": 166.92857360839844, "epoch": 3.4347408829174664, "grad_norm": 1.3943839073181152, "kl": 0.48864054679870605, "learning_rate": 2.7951734455078786e-08, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3579 }, { "completion_length": 153.42857360839844, "epoch": 3.435700575815739, "grad_norm": 1.7656581401824951, "kl": 0.37175920605659485, "learning_rate": 2.785870439973989e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3580 }, { "completion_length": 189.21429443359375, "epoch": 3.4366602687140118, "grad_norm": 0.021478619426488876, "kl": 0.3485143780708313, "learning_rate": 2.776582028008084e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3581 }, { "completion_length": 200.21429443359375, "epoch": 3.4376199616122842, "grad_norm": 0.01474663894623518, "kl": 0.3908478319644928, "learning_rate": 2.7673082157121662e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3582 }, { "completion_length": 168.0, "epoch": 3.4385796545105567, "grad_norm": 1.770392656326294, "kl": 0.43147239089012146, "learning_rate": 2.75804900917867e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3583 }, { "completion_length": 218.07144165039062, "epoch": 3.439539347408829, "grad_norm": 0.011026518419384956, "kl": 0.2766701579093933, "learning_rate": 2.7488044144904383e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3584 }, { "completion_length": 239.71429443359375, "epoch": 3.4404990403071016, "grad_norm": 0.010935511440038681, "kl": 0.25284871459007263, "learning_rate": 2.739574437720696e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3585 }, { "completion_length": 188.42857360839844, "epoch": 3.441458733205374, "grad_norm": 0.032076362520456314, "kl": 0.3953351378440857, "learning_rate": 2.7303590849330814e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3586 }, { "completion_length": 231.4285888671875, "epoch": 3.442418426103647, "grad_norm": 0.1717095673084259, "kl": 0.5800155997276306, "learning_rate": 2.7211583621816136e-08, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3587 }, { "completion_length": 182.35714721679688, "epoch": 3.4433781190019195, "grad_norm": 1.334232211112976, "kl": 0.331057071685791, "learning_rate": 2.7119722755107044e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3588 }, { "completion_length": 204.21429443359375, "epoch": 3.444337811900192, "grad_norm": 0.01151277869939804, "kl": 0.32054710388183594, "learning_rate": 2.702800830955157e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3589 }, { "completion_length": 205.71429443359375, "epoch": 3.4452975047984644, "grad_norm": 0.022356683388352394, "kl": 0.3384780287742615, "learning_rate": 2.693644034540149e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3590 }, { "completion_length": 203.35714721679688, "epoch": 3.446257197696737, "grad_norm": 1.437828540802002, "kl": 0.32470521330833435, "learning_rate": 2.6845018922812352e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3591 }, { "completion_length": 186.07144165039062, "epoch": 3.4472168905950094, "grad_norm": 0.013900774531066418, "kl": 0.3201424777507782, "learning_rate": 2.6753744101843443e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3592 }, { "completion_length": 174.1428680419922, "epoch": 3.4481765834932823, "grad_norm": 0.8469977378845215, "kl": 0.39773452281951904, "learning_rate": 2.6662615942457666e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3593 }, { "completion_length": 172.50001525878906, "epoch": 3.4491362763915547, "grad_norm": 0.018807202577590942, "kl": 0.392169326543808, "learning_rate": 2.657163450452174e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3594 }, { "completion_length": 169.85714721679688, "epoch": 3.450095969289827, "grad_norm": 0.013977281749248505, "kl": 0.3735432028770447, "learning_rate": 2.6480799847805964e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3595 }, { "completion_length": 207.50001525878906, "epoch": 3.4510556621880997, "grad_norm": 1.2026817798614502, "kl": 0.3265073001384735, "learning_rate": 2.6390112031983937e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3596 }, { "completion_length": 189.71429443359375, "epoch": 3.452015355086372, "grad_norm": 0.009335832670331001, "kl": 0.36705276370048523, "learning_rate": 2.629957111663314e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3597 }, { "completion_length": 234.2857208251953, "epoch": 3.452975047984645, "grad_norm": 0.7902823686599731, "kl": 0.2507869303226471, "learning_rate": 2.6209177161234442e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3598 }, { "completion_length": 154.35714721679688, "epoch": 3.4539347408829175, "grad_norm": 1.2945319414138794, "kl": 0.4449773132801056, "learning_rate": 2.611893022517203e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3599 }, { "completion_length": 198.2857208251953, "epoch": 3.45489443378119, "grad_norm": 0.5919753313064575, "kl": 0.31817689538002014, "learning_rate": 2.6028830367733706e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3600 }, { "completion_length": 172.92857360839844, "epoch": 3.4558541266794625, "grad_norm": 1.1012285947799683, "kl": 0.4058283269405365, "learning_rate": 2.5938877648110507e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3601 }, { "completion_length": 188.57144165039062, "epoch": 3.456813819577735, "grad_norm": 0.015129867009818554, "kl": 0.3932288885116577, "learning_rate": 2.584907212539683e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3602 }, { "completion_length": 236.07144165039062, "epoch": 3.457773512476008, "grad_norm": 1.1634324789047241, "kl": 0.2745761275291443, "learning_rate": 2.5759413858590456e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3603 }, { "completion_length": 200.07144165039062, "epoch": 3.4587332053742803, "grad_norm": 0.010747634805738926, "kl": 0.31745168566703796, "learning_rate": 2.5669902906592293e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3604 }, { "completion_length": 215.07144165039062, "epoch": 3.4596928982725528, "grad_norm": 0.7336201667785645, "kl": 0.3368784487247467, "learning_rate": 2.5580539328206614e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3605 }, { "completion_length": 207.07144165039062, "epoch": 3.4606525911708252, "grad_norm": 0.9103237986564636, "kl": 0.34647542238235474, "learning_rate": 2.5491323182140784e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3606 }, { "completion_length": 144.6428680419922, "epoch": 3.4616122840690977, "grad_norm": 1.4711482524871826, "kl": 0.4363515377044678, "learning_rate": 2.5402254527005285e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3607 }, { "completion_length": 162.35714721679688, "epoch": 3.4625719769673706, "grad_norm": 0.01351761631667614, "kl": 0.4162861108779907, "learning_rate": 2.531333342131378e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3608 }, { "completion_length": 199.21429443359375, "epoch": 3.463531669865643, "grad_norm": 0.011149157769978046, "kl": 0.30946052074432373, "learning_rate": 2.5224559923483075e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3609 }, { "completion_length": 182.2857208251953, "epoch": 3.4644913627639156, "grad_norm": 0.008692513220012188, "kl": 0.3509489595890045, "learning_rate": 2.5135934091832705e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3610 }, { "completion_length": 165.07144165039062, "epoch": 3.465451055662188, "grad_norm": 0.01666029915213585, "kl": 0.4135626554489136, "learning_rate": 2.5047455984585518e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3611 }, { "completion_length": 214.57144165039062, "epoch": 3.4664107485604605, "grad_norm": 1.4113229513168335, "kl": 0.35170263051986694, "learning_rate": 2.4959125659867186e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3612 }, { "completion_length": 186.1428680419922, "epoch": 3.4673704414587334, "grad_norm": 0.13700926303863525, "kl": 0.6029424667358398, "learning_rate": 2.487094317570623e-08, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3613 }, { "completion_length": 192.6428680419922, "epoch": 3.468330134357006, "grad_norm": 2.111257553100586, "kl": 0.34734877943992615, "learning_rate": 2.4782908590034235e-08, "loss": 0.0003, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 3614 }, { "completion_length": 173.7857208251953, "epoch": 3.4692898272552783, "grad_norm": 0.9219502806663513, "kl": 0.5197234749794006, "learning_rate": 2.4695021960685403e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3615 }, { "completion_length": 166.71429443359375, "epoch": 3.470249520153551, "grad_norm": 1.063632845878601, "kl": 0.3679032623767853, "learning_rate": 2.460728334539683e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3616 }, { "completion_length": 201.6428680419922, "epoch": 3.4712092130518233, "grad_norm": 1.1913435459136963, "kl": 0.3456050455570221, "learning_rate": 2.451969280180849e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3617 }, { "completion_length": 201.57144165039062, "epoch": 3.472168905950096, "grad_norm": 0.8354750871658325, "kl": 0.5213611125946045, "learning_rate": 2.4432250387462828e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3618 }, { "completion_length": 140.92857360839844, "epoch": 3.4731285988483687, "grad_norm": 1.4384957551956177, "kl": 0.46148982644081116, "learning_rate": 2.4344956159805252e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3619 }, { "completion_length": 230.2857208251953, "epoch": 3.474088291746641, "grad_norm": 0.010537712834775448, "kl": 0.27234113216400146, "learning_rate": 2.4257810176183614e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3620 }, { "completion_length": 164.1428680419922, "epoch": 3.4750479846449136, "grad_norm": 1.8151459693908691, "kl": 0.4061465859413147, "learning_rate": 2.417081249384842e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3621 }, { "completion_length": 160.6428680419922, "epoch": 3.476007677543186, "grad_norm": 0.013413443230092525, "kl": 0.37447869777679443, "learning_rate": 2.4083963169952844e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3622 }, { "completion_length": 168.5, "epoch": 3.476967370441459, "grad_norm": 0.009908950887620449, "kl": 0.3629602789878845, "learning_rate": 2.3997262261552604e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3623 }, { "completion_length": 196.7857208251953, "epoch": 3.4779270633397315, "grad_norm": 0.014653450809419155, "kl": 0.3260068893432617, "learning_rate": 2.3910709825605642e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3624 }, { "completion_length": 195.21429443359375, "epoch": 3.478886756238004, "grad_norm": 0.011015902273356915, "kl": 0.30881673097610474, "learning_rate": 2.3824305918972693e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3625 }, { "completion_length": 235.6428680419922, "epoch": 3.4798464491362764, "grad_norm": 0.014173881150782108, "kl": 0.28704091906547546, "learning_rate": 2.3738050598416796e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3626 }, { "completion_length": 161.57144165039062, "epoch": 3.480806142034549, "grad_norm": 1.031058669090271, "kl": 0.4277101755142212, "learning_rate": 2.3651943920603325e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3627 }, { "completion_length": 183.1428680419922, "epoch": 3.4817658349328213, "grad_norm": 1.6982033252716064, "kl": 0.3648936450481415, "learning_rate": 2.356598594210013e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3628 }, { "completion_length": 146.5, "epoch": 3.4827255278310942, "grad_norm": 1.4216452836990356, "kl": 0.5240235924720764, "learning_rate": 2.348017671937713e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3629 }, { "completion_length": 234.50001525878906, "epoch": 3.4836852207293667, "grad_norm": 0.013973478227853775, "kl": 0.25067663192749023, "learning_rate": 2.3394516308806762e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3630 }, { "completion_length": 126.28572082519531, "epoch": 3.484644913627639, "grad_norm": 0.023073391988873482, "kl": 0.5161922574043274, "learning_rate": 2.330900476666367e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3631 }, { "completion_length": 149.92857360839844, "epoch": 3.4856046065259116, "grad_norm": 0.9132435321807861, "kl": 0.3993733525276184, "learning_rate": 2.3223642149124563e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3632 }, { "completion_length": 183.92857360839844, "epoch": 3.486564299424184, "grad_norm": 0.02648351714015007, "kl": 0.37555640935897827, "learning_rate": 2.3138428512268436e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3633 }, { "completion_length": 250.71429443359375, "epoch": 3.4875239923224566, "grad_norm": 1.2690662145614624, "kl": 0.2737468481063843, "learning_rate": 2.305336391207638e-08, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3634 }, { "completion_length": 128.2857208251953, "epoch": 3.4884836852207295, "grad_norm": 1.0910425186157227, "kl": 0.6102107167243958, "learning_rate": 2.296844840443149e-08, "loss": 0.0006, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3635 }, { "completion_length": 163.57144165039062, "epoch": 3.489443378119002, "grad_norm": 1.2549883127212524, "kl": 0.4057672619819641, "learning_rate": 2.2883682045119062e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3636 }, { "completion_length": 202.71429443359375, "epoch": 3.4904030710172744, "grad_norm": 0.008043358102440834, "kl": 0.30777716636657715, "learning_rate": 2.2799064889826413e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3637 }, { "completion_length": 217.85714721679688, "epoch": 3.491362763915547, "grad_norm": 0.7121692299842834, "kl": 0.3227200508117676, "learning_rate": 2.2714596994142578e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3638 }, { "completion_length": 171.00001525878906, "epoch": 3.4923224568138194, "grad_norm": 0.03684408590197563, "kl": 0.42019233107566833, "learning_rate": 2.2630278413558828e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3639 }, { "completion_length": 161.35714721679688, "epoch": 3.4932821497120923, "grad_norm": 1.3678263425827026, "kl": 0.36416420340538025, "learning_rate": 2.2546109203468276e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3640 }, { "completion_length": 193.1428680419922, "epoch": 3.4942418426103647, "grad_norm": 1.3177274465560913, "kl": 0.3394598364830017, "learning_rate": 2.2462089419165776e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3641 }, { "completion_length": 169.6428680419922, "epoch": 3.495201535508637, "grad_norm": 0.010548200458288193, "kl": 0.3211057186126709, "learning_rate": 2.237821911584825e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3642 }, { "completion_length": 182.85714721679688, "epoch": 3.4961612284069097, "grad_norm": 0.02307163178920746, "kl": 0.3993740379810333, "learning_rate": 2.2294498348614105e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3643 }, { "completion_length": 186.71429443359375, "epoch": 3.497120921305182, "grad_norm": 0.01870967075228691, "kl": 0.38585102558135986, "learning_rate": 2.2210927172463782e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3644 }, { "completion_length": 238.7857208251953, "epoch": 3.498080614203455, "grad_norm": 0.014878618530929089, "kl": 0.3136272430419922, "learning_rate": 2.2127505642299342e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3645 }, { "completion_length": 190.00001525878906, "epoch": 3.4990403071017275, "grad_norm": 0.9869660139083862, "kl": 0.3630474805831909, "learning_rate": 2.2044233812924502e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3646 }, { "completion_length": 152.21429443359375, "epoch": 3.5, "grad_norm": 1.334701657295227, "kl": 0.44659316539764404, "learning_rate": 2.1961111739044746e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3647 }, { "completion_length": 208.1428680419922, "epoch": 3.5009596928982725, "grad_norm": 0.016747495159506798, "kl": 0.33267322182655334, "learning_rate": 2.1878139475267037e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3648 }, { "completion_length": 171.21429443359375, "epoch": 3.501919385796545, "grad_norm": 0.840132474899292, "kl": 0.39893096685409546, "learning_rate": 2.1795317076099994e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3649 }, { "completion_length": 204.71429443359375, "epoch": 3.502879078694818, "grad_norm": 1.3711005449295044, "kl": 0.2984742522239685, "learning_rate": 2.171264459595379e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3650 }, { "completion_length": 243.71429443359375, "epoch": 3.5038387715930903, "grad_norm": 0.8202998042106628, "kl": 0.27028220891952515, "learning_rate": 2.1630122089140167e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3651 }, { "completion_length": 209.6428680419922, "epoch": 3.504798464491363, "grad_norm": 1.1325221061706543, "kl": 0.36126282811164856, "learning_rate": 2.15477496098721e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3652 }, { "completion_length": 149.57144165039062, "epoch": 3.5057581573896353, "grad_norm": 0.012543094344437122, "kl": 0.42396602034568787, "learning_rate": 2.146552721226433e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3653 }, { "completion_length": 169.07144165039062, "epoch": 3.5067178502879077, "grad_norm": 0.012963724322617054, "kl": 0.4198109209537506, "learning_rate": 2.138345495033275e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3654 }, { "completion_length": 126.14286041259766, "epoch": 3.5076775431861806, "grad_norm": 1.996424674987793, "kl": 0.570809006690979, "learning_rate": 2.1301532877994742e-08, "loss": 0.0006, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3655 }, { "completion_length": 158.57144165039062, "epoch": 3.508637236084453, "grad_norm": 1.223650336265564, "kl": 0.3895556330680847, "learning_rate": 2.1219761049069086e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3656 }, { "completion_length": 163.42857360839844, "epoch": 3.5095969289827256, "grad_norm": 1.3701460361480713, "kl": 0.3939005434513092, "learning_rate": 2.113813951727561e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3657 }, { "completion_length": 209.6428680419922, "epoch": 3.510556621880998, "grad_norm": 1.4817591905593872, "kl": 0.3394921123981476, "learning_rate": 2.1056668336235623e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3658 }, { "completion_length": 168.57144165039062, "epoch": 3.5115163147792705, "grad_norm": 1.0590994358062744, "kl": 0.4048977792263031, "learning_rate": 2.0975347559471696e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3659 }, { "completion_length": 188.92857360839844, "epoch": 3.5124760076775434, "grad_norm": 0.013629505410790443, "kl": 0.3538658320903778, "learning_rate": 2.0894177240407345e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3660 }, { "completion_length": 224.85714721679688, "epoch": 3.513435700575816, "grad_norm": 0.020392458885908127, "kl": 0.29041633009910583, "learning_rate": 2.0813157432367527e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3661 }, { "completion_length": 153.7857208251953, "epoch": 3.5143953934740884, "grad_norm": 0.017182257026433945, "kl": 0.46942877769470215, "learning_rate": 2.073228818857811e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3662 }, { "completion_length": 157.7857208251953, "epoch": 3.515355086372361, "grad_norm": 0.01497318409383297, "kl": 0.46288588643074036, "learning_rate": 2.0651569562166126e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3663 }, { "completion_length": 206.57144165039062, "epoch": 3.5163147792706333, "grad_norm": 0.013065165840089321, "kl": 0.3021330237388611, "learning_rate": 2.0571001606159683e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3664 }, { "completion_length": 259.9285888671875, "epoch": 3.517274472168906, "grad_norm": 0.07877280563116074, "kl": 0.2612258791923523, "learning_rate": 2.049058437348797e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3665 }, { "completion_length": 205.00001525878906, "epoch": 3.5182341650671782, "grad_norm": 1.156162977218628, "kl": 0.42150506377220154, "learning_rate": 2.041031791698089e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3666 }, { "completion_length": 182.92857360839844, "epoch": 3.519193857965451, "grad_norm": 0.011462331749498844, "kl": 0.3520217537879944, "learning_rate": 2.03302022893696e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3667 }, { "completion_length": 200.85714721679688, "epoch": 3.5201535508637236, "grad_norm": 0.01786903850734234, "kl": 0.35237687826156616, "learning_rate": 2.0250237543286e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3668 }, { "completion_length": 148.57144165039062, "epoch": 3.521113243761996, "grad_norm": 2.390188694000244, "kl": 0.43074896931648254, "learning_rate": 2.0170423731262903e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3669 }, { "completion_length": 198.92857360839844, "epoch": 3.5220729366602685, "grad_norm": 1.3432438373565674, "kl": 0.3396081328392029, "learning_rate": 2.009076090573411e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3670 }, { "completion_length": 189.1428680419922, "epoch": 3.523032629558541, "grad_norm": 1.3920204639434814, "kl": 0.40757158398628235, "learning_rate": 2.00112491190339e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3671 }, { "completion_length": 139.85714721679688, "epoch": 3.523992322456814, "grad_norm": 1.5241376161575317, "kl": 0.45129042863845825, "learning_rate": 1.993188842339763e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3672 }, { "completion_length": 192.7857208251953, "epoch": 3.5249520153550864, "grad_norm": 1.3845579624176025, "kl": 0.42738568782806396, "learning_rate": 1.9852678870961316e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3673 }, { "completion_length": 222.35714721679688, "epoch": 3.525911708253359, "grad_norm": 0.860019862651825, "kl": 0.317802369594574, "learning_rate": 1.977362051376158e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3674 }, { "completion_length": 167.57144165039062, "epoch": 3.5268714011516313, "grad_norm": 0.01008826494216919, "kl": 0.3687859773635864, "learning_rate": 1.969471340373588e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3675 }, { "completion_length": 226.4285888671875, "epoch": 3.527831094049904, "grad_norm": 0.038570038974285126, "kl": 0.3173215091228485, "learning_rate": 1.9615957592722165e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3676 }, { "completion_length": 189.85714721679688, "epoch": 3.5287907869481767, "grad_norm": 1.8419034481048584, "kl": 0.3934882879257202, "learning_rate": 1.953735313245905e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3677 }, { "completion_length": 188.00001525878906, "epoch": 3.529750479846449, "grad_norm": 0.012953557074069977, "kl": 0.3208943009376526, "learning_rate": 1.945890007458578e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3678 }, { "completion_length": 251.07144165039062, "epoch": 3.5307101727447217, "grad_norm": 0.8381192684173584, "kl": 0.298504501581192, "learning_rate": 1.9380598470642023e-08, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3679 }, { "completion_length": 236.71429443359375, "epoch": 3.531669865642994, "grad_norm": 0.009304671548306942, "kl": 0.26488232612609863, "learning_rate": 1.9302448372067957e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3680 }, { "completion_length": 148.57144165039062, "epoch": 3.5326295585412666, "grad_norm": 3.5857272148132324, "kl": 0.5161511898040771, "learning_rate": 1.9224449830204383e-08, "loss": 0.0005, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3681 }, { "completion_length": 224.71429443359375, "epoch": 3.5335892514395395, "grad_norm": 0.014303197152912617, "kl": 0.33373671770095825, "learning_rate": 1.914660289629233e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3682 }, { "completion_length": 178.00001525878906, "epoch": 3.534548944337812, "grad_norm": 1.3789602518081665, "kl": 0.38735494017601013, "learning_rate": 1.9068907621473368e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3683 }, { "completion_length": 229.21429443359375, "epoch": 3.5355086372360844, "grad_norm": 0.010082104243338108, "kl": 0.26169896125793457, "learning_rate": 1.899136405678947e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3684 }, { "completion_length": 172.07144165039062, "epoch": 3.536468330134357, "grad_norm": 0.014182456769049168, "kl": 0.3605800271034241, "learning_rate": 1.8913972253182698e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3685 }, { "completion_length": 202.6428680419922, "epoch": 3.5374280230326294, "grad_norm": 0.009178601205348969, "kl": 0.31584474444389343, "learning_rate": 1.883673226149571e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3686 }, { "completion_length": 158.42857360839844, "epoch": 3.5383877159309023, "grad_norm": 0.011028497479856014, "kl": 0.4645547866821289, "learning_rate": 1.8759644132471274e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3687 }, { "completion_length": 170.42857360839844, "epoch": 3.5393474088291748, "grad_norm": 0.014213735237717628, "kl": 0.3719211518764496, "learning_rate": 1.868270791675236e-08, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3688 }, { "completion_length": 157.5, "epoch": 3.5403071017274472, "grad_norm": 0.018826734274625778, "kl": 0.4364089369773865, "learning_rate": 1.860592366488231e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3689 }, { "completion_length": 140.71429443359375, "epoch": 3.5412667946257197, "grad_norm": 0.9976784586906433, "kl": 0.5086076259613037, "learning_rate": 1.852929142730447e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3690 }, { "completion_length": 184.2857208251953, "epoch": 3.542226487523992, "grad_norm": 0.010565770789980888, "kl": 0.28858721256256104, "learning_rate": 1.8452811254362322e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3691 }, { "completion_length": 221.00001525878906, "epoch": 3.543186180422265, "grad_norm": 0.7768824696540833, "kl": 0.3033658564090729, "learning_rate": 1.8376483196299558e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3692 }, { "completion_length": 188.00001525878906, "epoch": 3.5441458733205375, "grad_norm": 0.020575009286403656, "kl": 0.35239967703819275, "learning_rate": 1.8300307303259904e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3693 }, { "completion_length": 171.6428680419922, "epoch": 3.54510556621881, "grad_norm": 1.1804229021072388, "kl": 0.44631221890449524, "learning_rate": 1.8224283625287023e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3694 }, { "completion_length": 173.71429443359375, "epoch": 3.5460652591170825, "grad_norm": 1.3824141025543213, "kl": 0.3856815695762634, "learning_rate": 1.8148412212324714e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3695 }, { "completion_length": 227.2857208251953, "epoch": 3.547024952015355, "grad_norm": 0.01708219200372696, "kl": 0.33080941438674927, "learning_rate": 1.8072693114216636e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3696 }, { "completion_length": 211.42857360839844, "epoch": 3.547984644913628, "grad_norm": 1.3388121128082275, "kl": 0.2978571653366089, "learning_rate": 1.799712638070644e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3697 }, { "completion_length": 154.42857360839844, "epoch": 3.5489443378119003, "grad_norm": 0.018886568024754524, "kl": 0.47909003496170044, "learning_rate": 1.792171206143783e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3698 }, { "completion_length": 213.35714721679688, "epoch": 3.549904030710173, "grad_norm": 0.011349895969033241, "kl": 0.32811570167541504, "learning_rate": 1.7846450205953988e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3699 }, { "completion_length": 201.92857360839844, "epoch": 3.5508637236084453, "grad_norm": 0.730628252029419, "kl": 0.2954549193382263, "learning_rate": 1.7771340863698304e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3700 }, { "completion_length": 174.2857208251953, "epoch": 3.5518234165067177, "grad_norm": 0.05867518112063408, "kl": 0.48158177733421326, "learning_rate": 1.769638408401383e-08, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3701 }, { "completion_length": 197.00001525878906, "epoch": 3.5527831094049906, "grad_norm": 1.394521713256836, "kl": 0.3147624433040619, "learning_rate": 1.762157991614341e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3702 }, { "completion_length": 195.7857208251953, "epoch": 3.553742802303263, "grad_norm": 1.4375709295272827, "kl": 0.3476080596446991, "learning_rate": 1.7546928409229693e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3703 }, { "completion_length": 179.57144165039062, "epoch": 3.5547024952015356, "grad_norm": 0.8353205323219299, "kl": 0.28401464223861694, "learning_rate": 1.747242961231482e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3704 }, { "completion_length": 213.1428680419922, "epoch": 3.555662188099808, "grad_norm": 0.9549077153205872, "kl": 0.2994273900985718, "learning_rate": 1.7398083574340832e-08, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3705 }, { "completion_length": 198.2857208251953, "epoch": 3.5566218809980805, "grad_norm": 1.6072609424591064, "kl": 0.3600272238254547, "learning_rate": 1.7323890344149412e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3706 }, { "completion_length": 246.4285888671875, "epoch": 3.5575815738963534, "grad_norm": 0.03732876479625702, "kl": 0.2955082058906555, "learning_rate": 1.7249849970481707e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3707 }, { "completion_length": 140.35714721679688, "epoch": 3.5585412667946255, "grad_norm": 1.2976542711257935, "kl": 0.44152501225471497, "learning_rate": 1.717596250197853e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3708 }, { "completion_length": 160.92857360839844, "epoch": 3.5595009596928984, "grad_norm": 0.014116106554865837, "kl": 0.39181914925575256, "learning_rate": 1.710222798718028e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3709 }, { "completion_length": 149.57144165039062, "epoch": 3.560460652591171, "grad_norm": 1.240505576133728, "kl": 0.41363394260406494, "learning_rate": 1.7028646474526786e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3710 }, { "completion_length": 229.2857208251953, "epoch": 3.5614203454894433, "grad_norm": 0.015501504763960838, "kl": 0.3367616534233093, "learning_rate": 1.6955218012357415e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3711 }, { "completion_length": 213.42857360839844, "epoch": 3.5623800383877158, "grad_norm": 0.577447235584259, "kl": 0.3280046582221985, "learning_rate": 1.6881942648911074e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3712 }, { "completion_length": 224.00001525878906, "epoch": 3.5633397312859882, "grad_norm": 1.2835675477981567, "kl": 0.3822989761829376, "learning_rate": 1.6808820432325872e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3713 }, { "completion_length": 150.7857208251953, "epoch": 3.564299424184261, "grad_norm": 1.736883521080017, "kl": 0.4817219376564026, "learning_rate": 1.6735851410639462e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3714 }, { "completion_length": 184.57144165039062, "epoch": 3.5652591170825336, "grad_norm": 0.01647990010678768, "kl": 0.3767276108264923, "learning_rate": 1.666303563178889e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3715 }, { "completion_length": 206.1428680419922, "epoch": 3.566218809980806, "grad_norm": 0.019245285540819168, "kl": 0.38137784600257874, "learning_rate": 1.659037314361039e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3716 }, { "completion_length": 212.71429443359375, "epoch": 3.5671785028790786, "grad_norm": 0.00970411766320467, "kl": 0.3299715220928192, "learning_rate": 1.651786399383967e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3717 }, { "completion_length": 173.85714721679688, "epoch": 3.568138195777351, "grad_norm": 0.025314465165138245, "kl": 0.3429577052593231, "learning_rate": 1.644550823011148e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3718 }, { "completion_length": 150.5, "epoch": 3.569097888675624, "grad_norm": 0.012232447043061256, "kl": 0.46087145805358887, "learning_rate": 1.637330589995997e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3719 }, { "completion_length": 162.85714721679688, "epoch": 3.5700575815738964, "grad_norm": 0.014156927354633808, "kl": 0.35625317692756653, "learning_rate": 1.63012570508185e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3720 }, { "completion_length": 221.2857208251953, "epoch": 3.571017274472169, "grad_norm": 0.7723085284233093, "kl": 0.2825401723384857, "learning_rate": 1.6229361730019513e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3721 }, { "completion_length": 191.92857360839844, "epoch": 3.5719769673704413, "grad_norm": 1.2773611545562744, "kl": 0.370078444480896, "learning_rate": 1.6157619984794564e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3722 }, { "completion_length": 173.71429443359375, "epoch": 3.572936660268714, "grad_norm": 0.01953227072954178, "kl": 0.4285464584827423, "learning_rate": 1.6086031862274474e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3723 }, { "completion_length": 143.21429443359375, "epoch": 3.5738963531669867, "grad_norm": 3.148104667663574, "kl": 0.4437865614891052, "learning_rate": 1.6014597409488988e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3724 }, { "completion_length": 271.21429443359375, "epoch": 3.574856046065259, "grad_norm": 1.599349856376648, "kl": 0.3229457437992096, "learning_rate": 1.5943316673366957e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3725 }, { "completion_length": 195.35714721679688, "epoch": 3.5758157389635317, "grad_norm": 1.5081380605697632, "kl": 0.3044409453868866, "learning_rate": 1.5872189700736337e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3726 }, { "completion_length": 142.92857360839844, "epoch": 3.576775431861804, "grad_norm": 1.9717048406600952, "kl": 0.4520241618156433, "learning_rate": 1.5801216538323835e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3727 }, { "completion_length": 220.9285888671875, "epoch": 3.5777351247600766, "grad_norm": 0.01057406421750784, "kl": 0.35116299986839294, "learning_rate": 1.573039723275535e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3728 }, { "completion_length": 212.1428680419922, "epoch": 3.5786948176583495, "grad_norm": 0.012468394823372364, "kl": 0.3097410798072815, "learning_rate": 1.565973183055555e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3729 }, { "completion_length": 231.7857208251953, "epoch": 3.579654510556622, "grad_norm": 0.012036305852234364, "kl": 0.26326024532318115, "learning_rate": 1.558922037814808e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3730 }, { "completion_length": 155.42857360839844, "epoch": 3.5806142034548945, "grad_norm": 1.7303431034088135, "kl": 0.4035284221172333, "learning_rate": 1.551886292185553e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3731 }, { "completion_length": 196.42857360839844, "epoch": 3.581573896353167, "grad_norm": 1.7752971649169922, "kl": 0.36243245005607605, "learning_rate": 1.5448659507899033e-08, "loss": 0.0004, "reward": 0.5714285969734192, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.5714285969734192, "step": 3732 }, { "completion_length": 179.42857360839844, "epoch": 3.5825335892514394, "grad_norm": 1.3751931190490723, "kl": 0.4037622809410095, "learning_rate": 1.5378610182398804e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3733 }, { "completion_length": 170.35714721679688, "epoch": 3.5834932821497123, "grad_norm": 0.018164820969104767, "kl": 0.415378600358963, "learning_rate": 1.530871499137376e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3734 }, { "completion_length": 180.07144165039062, "epoch": 3.5844529750479848, "grad_norm": 1.4220048189163208, "kl": 0.36856329441070557, "learning_rate": 1.5238973980741505e-08, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3735 }, { "completion_length": 188.2857208251953, "epoch": 3.5854126679462572, "grad_norm": 1.0880872011184692, "kl": 0.3558719754219055, "learning_rate": 1.5169387196318328e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3736 }, { "completion_length": 197.57144165039062, "epoch": 3.5863723608445297, "grad_norm": 0.8737940192222595, "kl": 0.3100458085536957, "learning_rate": 1.509995468381936e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3737 }, { "completion_length": 173.50001525878906, "epoch": 3.587332053742802, "grad_norm": 2.4661850929260254, "kl": 0.4277718961238861, "learning_rate": 1.503067648885814e-08, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.4040610194206238, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3738 }, { "completion_length": 223.00001525878906, "epoch": 3.588291746641075, "grad_norm": 0.5072264075279236, "kl": 0.29294130206108093, "learning_rate": 1.496155265694707e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3739 }, { "completion_length": 206.85714721679688, "epoch": 3.5892514395393476, "grad_norm": 0.008543414995074272, "kl": 0.2942042052745819, "learning_rate": 1.4892583233497097e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3740 }, { "completion_length": 169.07144165039062, "epoch": 3.59021113243762, "grad_norm": 1.6426976919174194, "kl": 0.4000421464443207, "learning_rate": 1.4823768263817482e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3741 }, { "completion_length": 157.6428680419922, "epoch": 3.5911708253358925, "grad_norm": 0.013222526758909225, "kl": 0.4015849530696869, "learning_rate": 1.4755107793116367e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3742 }, { "completion_length": 199.2857208251953, "epoch": 3.592130518234165, "grad_norm": 0.024136414751410484, "kl": 0.43131259083747864, "learning_rate": 1.4686601866500115e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3743 }, { "completion_length": 210.6428680419922, "epoch": 3.593090211132438, "grad_norm": 0.8128000497817993, "kl": 0.3048863410949707, "learning_rate": 1.4618250528973752e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3744 }, { "completion_length": 193.57144165039062, "epoch": 3.59404990403071, "grad_norm": 1.7008763551712036, "kl": 0.349496990442276, "learning_rate": 1.4550053825440722e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3745 }, { "completion_length": 206.35714721679688, "epoch": 3.595009596928983, "grad_norm": 1.549566388130188, "kl": 0.32350954413414, "learning_rate": 1.4482011800702715e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3746 }, { "completion_length": 215.07144165039062, "epoch": 3.5959692898272553, "grad_norm": 1.4645044803619385, "kl": 0.33352839946746826, "learning_rate": 1.441412449946e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3747 }, { "completion_length": 250.9285888671875, "epoch": 3.5969289827255277, "grad_norm": 1.0823825597763062, "kl": 0.31862232089042664, "learning_rate": 1.4346391966311128e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3748 }, { "completion_length": 176.92857360839844, "epoch": 3.5978886756238007, "grad_norm": 0.8460863828659058, "kl": 0.33011579513549805, "learning_rate": 1.4278814245752973e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3749 }, { "completion_length": 176.85714721679688, "epoch": 3.5988483685220727, "grad_norm": 1.030044436454773, "kl": 0.3436357080936432, "learning_rate": 1.4211391382180637e-08, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3750 }, { "completion_length": 208.92857360839844, "epoch": 3.5998080614203456, "grad_norm": 0.009837611578404903, "kl": 0.29564961791038513, "learning_rate": 1.414412341988766e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3751 }, { "completion_length": 182.42857360839844, "epoch": 3.600767754318618, "grad_norm": 0.6425111293792725, "kl": 0.36226552724838257, "learning_rate": 1.4077010403065608e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3752 }, { "completion_length": 213.6428680419922, "epoch": 3.6017274472168905, "grad_norm": 0.009137660264968872, "kl": 0.3098016679286957, "learning_rate": 1.4010052375804493e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3753 }, { "completion_length": 142.0, "epoch": 3.602687140115163, "grad_norm": 1.8813371658325195, "kl": 0.5046231746673584, "learning_rate": 1.3943249382092292e-08, "loss": 0.0005, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3754 }, { "completion_length": 115.92857360839844, "epoch": 3.6036468330134355, "grad_norm": 1.6560404300689697, "kl": 0.4937984347343445, "learning_rate": 1.3876601465815179e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3755 }, { "completion_length": 186.35714721679688, "epoch": 3.6046065259117084, "grad_norm": 1.9277383089065552, "kl": 0.35960784554481506, "learning_rate": 1.3810108670757603e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3756 }, { "completion_length": 186.00001525878906, "epoch": 3.605566218809981, "grad_norm": 1.8977829217910767, "kl": 0.3756295144557953, "learning_rate": 1.3743771040601898e-08, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3757 }, { "completion_length": 233.4285888671875, "epoch": 3.6065259117082533, "grad_norm": 0.011998129077255726, "kl": 0.2742859125137329, "learning_rate": 1.3677588618928592e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3758 }, { "completion_length": 204.1428680419922, "epoch": 3.607485604606526, "grad_norm": 0.01415242999792099, "kl": 0.34582194685935974, "learning_rate": 1.361156144921627e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3759 }, { "completion_length": 167.1428680419922, "epoch": 3.6084452975047983, "grad_norm": 1.5650557279586792, "kl": 0.38252463936805725, "learning_rate": 1.3545689574841341e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3760 }, { "completion_length": 237.4285888671875, "epoch": 3.609404990403071, "grad_norm": 1.418296217918396, "kl": 0.3402608036994934, "learning_rate": 1.3479973039078385e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3761 }, { "completion_length": 177.21429443359375, "epoch": 3.6103646833013436, "grad_norm": 1.632758378982544, "kl": 0.41640332341194153, "learning_rate": 1.3414411885099924e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3762 }, { "completion_length": 199.85714721679688, "epoch": 3.611324376199616, "grad_norm": 0.7810857892036438, "kl": 0.42159149050712585, "learning_rate": 1.3349006155976311e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3763 }, { "completion_length": 173.42857360839844, "epoch": 3.6122840690978886, "grad_norm": 1.3500378131866455, "kl": 0.34666818380355835, "learning_rate": 1.328375589467573e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3764 }, { "completion_length": 184.35714721679688, "epoch": 3.613243761996161, "grad_norm": 0.8730621933937073, "kl": 0.3468092381954193, "learning_rate": 1.3218661144064474e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3765 }, { "completion_length": 181.07144165039062, "epoch": 3.614203454894434, "grad_norm": 0.011823724023997784, "kl": 0.34568265080451965, "learning_rate": 1.315372194690642e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3766 }, { "completion_length": 179.92857360839844, "epoch": 3.6151631477927064, "grad_norm": 0.016320502385497093, "kl": 0.3859751522541046, "learning_rate": 1.3088938345863415e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3767 }, { "completion_length": 222.71429443359375, "epoch": 3.616122840690979, "grad_norm": 0.5771341323852539, "kl": 0.3539409041404724, "learning_rate": 1.3024310383495025e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3768 }, { "completion_length": 176.21429443359375, "epoch": 3.6170825335892514, "grad_norm": 0.026823995634913445, "kl": 0.42238572239875793, "learning_rate": 1.2959838102258535e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3769 }, { "completion_length": 187.71429443359375, "epoch": 3.618042226487524, "grad_norm": 0.01161117572337389, "kl": 0.31646791100502014, "learning_rate": 1.289552154450907e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3770 }, { "completion_length": 142.42857360839844, "epoch": 3.6190019193857967, "grad_norm": 1.881126046180725, "kl": 0.3565584719181061, "learning_rate": 1.2831360752499299e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3771 }, { "completion_length": 202.92857360839844, "epoch": 3.619961612284069, "grad_norm": 0.9259598851203918, "kl": 0.38192448019981384, "learning_rate": 1.2767355768379701e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3772 }, { "completion_length": 166.2857208251953, "epoch": 3.6209213051823417, "grad_norm": 0.926468014717102, "kl": 0.4033023715019226, "learning_rate": 1.2703506634198364e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3773 }, { "completion_length": 177.21429443359375, "epoch": 3.621880998080614, "grad_norm": 0.013283665291965008, "kl": 0.36002448201179504, "learning_rate": 1.2639813391900872e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3774 }, { "completion_length": 174.07144165039062, "epoch": 3.6228406909788866, "grad_norm": 1.0223792791366577, "kl": 0.3693391978740692, "learning_rate": 1.2576276083330534e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3775 }, { "completion_length": 157.21429443359375, "epoch": 3.6238003838771595, "grad_norm": 0.010094906203448772, "kl": 0.3555733263492584, "learning_rate": 1.2512894750228209e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3776 }, { "completion_length": 162.35714721679688, "epoch": 3.624760076775432, "grad_norm": 2.89174222946167, "kl": 0.4768636226654053, "learning_rate": 1.2449669434232202e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3777 }, { "completion_length": 165.1428680419922, "epoch": 3.6257197696737045, "grad_norm": 0.012147145345807076, "kl": 0.38758906722068787, "learning_rate": 1.2386600176878398e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3778 }, { "completion_length": 219.35714721679688, "epoch": 3.626679462571977, "grad_norm": 0.014940915629267693, "kl": 0.2798348367214203, "learning_rate": 1.232368701960007e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3779 }, { "completion_length": 161.57144165039062, "epoch": 3.6276391554702494, "grad_norm": 2.2677223682403564, "kl": 0.4481329321861267, "learning_rate": 1.2260930003728075e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3780 }, { "completion_length": 222.35714721679688, "epoch": 3.6285988483685223, "grad_norm": 1.2484296560287476, "kl": 0.27002090215682983, "learning_rate": 1.2198329170490601e-08, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3781 }, { "completion_length": 201.1428680419922, "epoch": 3.629558541266795, "grad_norm": 1.2967246770858765, "kl": 0.37008342146873474, "learning_rate": 1.2135884561013305e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3782 }, { "completion_length": 178.07144165039062, "epoch": 3.6305182341650672, "grad_norm": 1.0821346044540405, "kl": 0.348824143409729, "learning_rate": 1.2073596216319037e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3783 }, { "completion_length": 197.92857360839844, "epoch": 3.6314779270633397, "grad_norm": 1.0040243864059448, "kl": 0.3686668574810028, "learning_rate": 1.2011464177328235e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3784 }, { "completion_length": 162.1428680419922, "epoch": 3.632437619961612, "grad_norm": 1.1358072757720947, "kl": 0.3823033273220062, "learning_rate": 1.194948848485844e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3785 }, { "completion_length": 201.00001525878906, "epoch": 3.633397312859885, "grad_norm": 1.693673849105835, "kl": 0.3216906189918518, "learning_rate": 1.188766917962461e-08, "loss": 0.0003, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3786 }, { "completion_length": 193.1428680419922, "epoch": 3.634357005758157, "grad_norm": 0.6414990425109863, "kl": 0.4205610156059265, "learning_rate": 1.1826006302238983e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3787 }, { "completion_length": 263.5714416503906, "epoch": 3.63531669865643, "grad_norm": 0.01537734642624855, "kl": 0.2782510221004486, "learning_rate": 1.1764499893210878e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3788 }, { "completion_length": 163.2857208251953, "epoch": 3.6362763915547025, "grad_norm": 1.2202413082122803, "kl": 0.3774109184741974, "learning_rate": 1.1703149992946942e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3789 }, { "completion_length": 148.35714721679688, "epoch": 3.637236084452975, "grad_norm": 0.010893617756664753, "kl": 0.3957313895225525, "learning_rate": 1.1641956641751022e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3790 }, { "completion_length": 187.00001525878906, "epoch": 3.6381957773512474, "grad_norm": 0.01317959651350975, "kl": 0.42184165120124817, "learning_rate": 1.1580919879824041e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3791 }, { "completion_length": 148.7857208251953, "epoch": 3.63915547024952, "grad_norm": 1.4359917640686035, "kl": 0.4749239385128021, "learning_rate": 1.152003974726412e-08, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3792 }, { "completion_length": 218.7857208251953, "epoch": 3.640115163147793, "grad_norm": 0.7877998948097229, "kl": 0.32181358337402344, "learning_rate": 1.1459316284066379e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3793 }, { "completion_length": 190.07144165039062, "epoch": 3.6410748560460653, "grad_norm": 1.4574048519134521, "kl": 0.34802383184432983, "learning_rate": 1.1398749530123125e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3794 }, { "completion_length": 225.6428680419922, "epoch": 3.6420345489443378, "grad_norm": 0.796306848526001, "kl": 0.3405497670173645, "learning_rate": 1.1338339525223727e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3795 }, { "completion_length": 176.7857208251953, "epoch": 3.6429942418426102, "grad_norm": 2.254810094833374, "kl": 0.3661993145942688, "learning_rate": 1.1278086309054497e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3796 }, { "completion_length": 123.42857360839844, "epoch": 3.6439539347408827, "grad_norm": 2.1678271293640137, "kl": 0.5157276391983032, "learning_rate": 1.1217989921198712e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3797 }, { "completion_length": 229.85714721679688, "epoch": 3.6449136276391556, "grad_norm": 0.01686025597155094, "kl": 0.3203376531600952, "learning_rate": 1.1158050401136765e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3798 }, { "completion_length": 156.92857360839844, "epoch": 3.645873320537428, "grad_norm": 0.013270338997244835, "kl": 0.49987441301345825, "learning_rate": 1.1098267788245824e-08, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3799 }, { "completion_length": 200.35714721679688, "epoch": 3.6468330134357005, "grad_norm": 0.014679796993732452, "kl": 0.37610000371932983, "learning_rate": 1.1038642121800112e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3800 }, { "completion_length": 172.7857208251953, "epoch": 3.647792706333973, "grad_norm": 0.014814400114119053, "kl": 0.3932051360607147, "learning_rate": 1.0979173440970796e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3801 }, { "completion_length": 200.57144165039062, "epoch": 3.6487523992322455, "grad_norm": 0.012796144001185894, "kl": 0.35564425587654114, "learning_rate": 1.0919861784825623e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3802 }, { "completion_length": 235.00001525878906, "epoch": 3.6497120921305184, "grad_norm": 0.011199969798326492, "kl": 0.2847839891910553, "learning_rate": 1.0860707192329537e-08, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3803 }, { "completion_length": 245.35714721679688, "epoch": 3.650671785028791, "grad_norm": 0.7737447023391724, "kl": 0.27151334285736084, "learning_rate": 1.0801709702344008e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3804 }, { "completion_length": 222.50001525878906, "epoch": 3.6516314779270633, "grad_norm": 0.746620237827301, "kl": 0.28666990995407104, "learning_rate": 1.0742869353627532e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3805 }, { "completion_length": 214.35714721679688, "epoch": 3.652591170825336, "grad_norm": 0.7656450271606445, "kl": 0.30268046259880066, "learning_rate": 1.0684186184835215e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3806 }, { "completion_length": 244.1428680419922, "epoch": 3.6535508637236083, "grad_norm": 0.8219727873802185, "kl": 0.4052301347255707, "learning_rate": 1.0625660234518913e-08, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3807 }, { "completion_length": 203.71429443359375, "epoch": 3.654510556621881, "grad_norm": 0.9147432446479797, "kl": 0.32186031341552734, "learning_rate": 1.0567291541127288e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3808 }, { "completion_length": 204.7857208251953, "epoch": 3.6554702495201536, "grad_norm": 1.8902627229690552, "kl": 0.3770335912704468, "learning_rate": 1.0509080143005667e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3809 }, { "completion_length": 236.71429443359375, "epoch": 3.656429942418426, "grad_norm": 1.269987940788269, "kl": 0.2859418988227844, "learning_rate": 1.0451026078395985e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3810 }, { "completion_length": 187.00001525878906, "epoch": 3.6573896353166986, "grad_norm": 0.011646146886050701, "kl": 0.3529660999774933, "learning_rate": 1.0393129385436823e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3811 }, { "completion_length": 221.6428680419922, "epoch": 3.658349328214971, "grad_norm": 1.5455914735794067, "kl": 0.32059022784233093, "learning_rate": 1.0335390102163444e-08, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3812 }, { "completion_length": 216.35714721679688, "epoch": 3.659309021113244, "grad_norm": 0.025164227932691574, "kl": 0.3972429931163788, "learning_rate": 1.027780826650762e-08, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3813 }, { "completion_length": 185.92857360839844, "epoch": 3.6602687140115164, "grad_norm": 0.022712605074048042, "kl": 0.3855780065059662, "learning_rate": 1.0220383916297787e-08, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3814 }, { "completion_length": 205.85714721679688, "epoch": 3.661228406909789, "grad_norm": 1.150330901145935, "kl": 0.3481729328632355, "learning_rate": 1.0163117089258794e-08, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3815 }, { "completion_length": 139.1428680419922, "epoch": 3.6621880998080614, "grad_norm": 2.5786516666412354, "kl": 0.5372228622436523, "learning_rate": 1.0106007823012074e-08, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3816 }, { "completion_length": 203.1428680419922, "epoch": 3.663147792706334, "grad_norm": 0.9077898859977722, "kl": 0.3109014630317688, "learning_rate": 1.004905615507562e-08, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3817 }, { "completion_length": 195.71429443359375, "epoch": 3.6641074856046068, "grad_norm": 0.716417670249939, "kl": 0.342434287071228, "learning_rate": 9.99226212286372e-09, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3818 }, { "completion_length": 198.85714721679688, "epoch": 3.665067178502879, "grad_norm": 1.0658193826675415, "kl": 0.32139334082603455, "learning_rate": 9.935625763687283e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3819 }, { "completion_length": 184.71429443359375, "epoch": 3.6660268714011517, "grad_norm": 1.7928929328918457, "kl": 0.39196425676345825, "learning_rate": 9.879147114753545e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3820 }, { "completion_length": 182.00001525878906, "epoch": 3.666986564299424, "grad_norm": 1.251045823097229, "kl": 0.3642871677875519, "learning_rate": 9.822826213166074e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3821 }, { "completion_length": 179.21429443359375, "epoch": 3.6679462571976966, "grad_norm": 0.010235805995762348, "kl": 0.37812474370002747, "learning_rate": 9.766663095924944e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3822 }, { "completion_length": 176.07144165039062, "epoch": 3.6689059500959695, "grad_norm": 2.119469165802002, "kl": 0.6923273801803589, "learning_rate": 9.710657799926469e-09, "loss": 0.0007, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3823 }, { "completion_length": 217.6428680419922, "epoch": 3.669865642994242, "grad_norm": 0.010946603491902351, "kl": 0.28191420435905457, "learning_rate": 9.654810361963362e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3824 }, { "completion_length": 215.07144165039062, "epoch": 3.6708253358925145, "grad_norm": 1.1220519542694092, "kl": 0.31089314818382263, "learning_rate": 9.599120818724521e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3825 }, { "completion_length": 187.71429443359375, "epoch": 3.671785028790787, "grad_norm": 0.028371747583150864, "kl": 0.3962624669075012, "learning_rate": 9.543589206795238e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3826 }, { "completion_length": 181.07144165039062, "epoch": 3.6727447216890594, "grad_norm": 0.8001058101654053, "kl": 0.3730444312095642, "learning_rate": 9.488215562656942e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3827 }, { "completion_length": 142.35714721679688, "epoch": 3.6737044145873323, "grad_norm": 2.035280704498291, "kl": 0.4282230734825134, "learning_rate": 9.432999922687396e-09, "loss": 0.0004, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 3828 }, { "completion_length": 148.57144165039062, "epoch": 3.6746641074856043, "grad_norm": 1.1290122270584106, "kl": 0.4057772755622864, "learning_rate": 9.377942323160497e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3829 }, { "completion_length": 190.7857208251953, "epoch": 3.6756238003838773, "grad_norm": 0.012157374061644077, "kl": 0.37528178095817566, "learning_rate": 9.323042800246234e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3830 }, { "completion_length": 202.57144165039062, "epoch": 3.6765834932821497, "grad_norm": 1.571549892425537, "kl": 0.38623344898223877, "learning_rate": 9.268301390010946e-09, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3831 }, { "completion_length": 218.42857360839844, "epoch": 3.677543186180422, "grad_norm": 0.011334375478327274, "kl": 0.325613796710968, "learning_rate": 9.213718128416925e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3832 }, { "completion_length": 182.35714721679688, "epoch": 3.6785028790786947, "grad_norm": 0.010738412849605083, "kl": 0.3448648154735565, "learning_rate": 9.159293051322681e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3833 }, { "completion_length": 147.2857208251953, "epoch": 3.679462571976967, "grad_norm": 1.3329461812973022, "kl": 0.3897096812725067, "learning_rate": 9.105026194482751e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3834 }, { "completion_length": 174.57144165039062, "epoch": 3.68042226487524, "grad_norm": 0.01427086628973484, "kl": 0.34608858823776245, "learning_rate": 9.050917593547675e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3835 }, { "completion_length": 179.21429443359375, "epoch": 3.6813819577735125, "grad_norm": 0.011358145624399185, "kl": 0.32363834977149963, "learning_rate": 8.996967284064128e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3836 }, { "completion_length": 234.9285888671875, "epoch": 3.682341650671785, "grad_norm": 1.48612380027771, "kl": 0.3232039511203766, "learning_rate": 8.943175301474814e-09, "loss": 0.0003, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 3837 }, { "completion_length": 181.57144165039062, "epoch": 3.6833013435700575, "grad_norm": 1.362532615661621, "kl": 0.5471625328063965, "learning_rate": 8.889541681118274e-09, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3838 }, { "completion_length": 223.6428680419922, "epoch": 3.68426103646833, "grad_norm": 0.04167693108320236, "kl": 0.3298100233078003, "learning_rate": 8.836066458229153e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3839 }, { "completion_length": 196.57144165039062, "epoch": 3.685220729366603, "grad_norm": 0.017461158335208893, "kl": 0.41152918338775635, "learning_rate": 8.782749667937961e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3840 }, { "completion_length": 177.71429443359375, "epoch": 3.6861804222648753, "grad_norm": 1.3241841793060303, "kl": 0.4958183765411377, "learning_rate": 8.729591345271153e-09, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3841 }, { "completion_length": 172.71429443359375, "epoch": 3.6871401151631478, "grad_norm": 0.9647241234779358, "kl": 0.4915394186973572, "learning_rate": 8.676591525151127e-09, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3842 }, { "completion_length": 184.35714721679688, "epoch": 3.6880998080614202, "grad_norm": 1.3220471143722534, "kl": 0.386569082736969, "learning_rate": 8.623750242396088e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3843 }, { "completion_length": 177.07144165039062, "epoch": 3.6890595009596927, "grad_norm": 2.0087265968322754, "kl": 0.44485345482826233, "learning_rate": 8.571067531720017e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3844 }, { "completion_length": 195.57144165039062, "epoch": 3.6900191938579656, "grad_norm": 0.009874084033071995, "kl": 0.2893816828727722, "learning_rate": 8.518543427732949e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3845 }, { "completion_length": 173.7857208251953, "epoch": 3.690978886756238, "grad_norm": 0.019191479310393333, "kl": 0.4880392551422119, "learning_rate": 8.466177964940479e-09, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3846 }, { "completion_length": 173.85714721679688, "epoch": 3.6919385796545106, "grad_norm": 0.011677495203912258, "kl": 0.3270241320133209, "learning_rate": 8.413971177744144e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3847 }, { "completion_length": 210.6428680419922, "epoch": 3.692898272552783, "grad_norm": 1.716440200805664, "kl": 0.3290935456752777, "learning_rate": 8.361923100441148e-09, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3848 }, { "completion_length": 200.71429443359375, "epoch": 3.6938579654510555, "grad_norm": 0.9446393251419067, "kl": 0.7092111110687256, "learning_rate": 8.310033767224473e-09, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3849 }, { "completion_length": 195.85714721679688, "epoch": 3.6948176583493284, "grad_norm": 0.01258152723312378, "kl": 0.3851985037326813, "learning_rate": 8.258303212182821e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3850 }, { "completion_length": 136.42857360839844, "epoch": 3.695777351247601, "grad_norm": 1.233411431312561, "kl": 0.45199790596961975, "learning_rate": 8.206731469300565e-09, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3851 }, { "completion_length": 205.35714721679688, "epoch": 3.6967370441458733, "grad_norm": 1.4762423038482666, "kl": 0.3701041042804718, "learning_rate": 8.15531857245777e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3852 }, { "completion_length": 283.14288330078125, "epoch": 3.697696737044146, "grad_norm": 0.48527276515960693, "kl": 0.24256011843681335, "learning_rate": 8.104064555430084e-09, "loss": 0.0002, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3853 }, { "completion_length": 165.6428680419922, "epoch": 3.6986564299424183, "grad_norm": 2.220635175704956, "kl": 0.39287248253822327, "learning_rate": 8.052969451888824e-09, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3854 }, { "completion_length": 129.42857360839844, "epoch": 3.699616122840691, "grad_norm": 1.7906221151351929, "kl": 0.4748733937740326, "learning_rate": 8.002033295400945e-09, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3855 }, { "completion_length": 190.85714721679688, "epoch": 3.7005758157389637, "grad_norm": 1.9054042100906372, "kl": 0.4271790087223053, "learning_rate": 7.95125611942893e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3856 }, { "completion_length": 215.00001525878906, "epoch": 3.701535508637236, "grad_norm": 0.7109850645065308, "kl": 0.2761542499065399, "learning_rate": 7.900637957330869e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3857 }, { "completion_length": 141.07144165039062, "epoch": 3.7024952015355086, "grad_norm": 0.8752802014350891, "kl": 0.4815736711025238, "learning_rate": 7.85017884236025e-09, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3858 }, { "completion_length": 153.35714721679688, "epoch": 3.703454894433781, "grad_norm": 3.0079967975616455, "kl": 0.40955838561058044, "learning_rate": 7.799878807666305e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3859 }, { "completion_length": 139.85714721679688, "epoch": 3.704414587332054, "grad_norm": 0.011673086322844028, "kl": 0.4660263955593109, "learning_rate": 7.749737886293545e-09, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3860 }, { "completion_length": 165.42857360839844, "epoch": 3.7053742802303264, "grad_norm": 0.013244574889540672, "kl": 0.40114766359329224, "learning_rate": 7.699756111182065e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3861 }, { "completion_length": 153.7857208251953, "epoch": 3.706333973128599, "grad_norm": 1.3215833902359009, "kl": 0.3994078040122986, "learning_rate": 7.649933515167406e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3862 }, { "completion_length": 188.35714721679688, "epoch": 3.7072936660268714, "grad_norm": 0.01714291051030159, "kl": 0.34581416845321655, "learning_rate": 7.600270130980469e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3863 }, { "completion_length": 138.2857208251953, "epoch": 3.708253358925144, "grad_norm": 1.8666753768920898, "kl": 0.4599756598472595, "learning_rate": 7.550765991247654e-09, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3864 }, { "completion_length": 220.1428680419922, "epoch": 3.7092130518234168, "grad_norm": 0.7374548316001892, "kl": 0.2869422137737274, "learning_rate": 7.501421128490616e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3865 }, { "completion_length": 181.2857208251953, "epoch": 3.710172744721689, "grad_norm": 0.9338016510009766, "kl": 0.3900291323661804, "learning_rate": 7.452235575126558e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3866 }, { "completion_length": 161.5, "epoch": 3.7111324376199617, "grad_norm": 1.6216187477111816, "kl": 0.3841131329536438, "learning_rate": 7.40320936346786e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3867 }, { "completion_length": 214.92857360839844, "epoch": 3.712092130518234, "grad_norm": 1.0508121252059937, "kl": 0.3019157946109772, "learning_rate": 7.3543425257222565e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3868 }, { "completion_length": 192.57144165039062, "epoch": 3.7130518234165066, "grad_norm": 0.014199444092810154, "kl": 0.3743788003921509, "learning_rate": 7.305635093992818e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3869 }, { "completion_length": 126.85714721679688, "epoch": 3.714011516314779, "grad_norm": 1.3409433364868164, "kl": 0.48173823952674866, "learning_rate": 7.257087100277947e-09, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3870 }, { "completion_length": 216.71429443359375, "epoch": 3.7149712092130516, "grad_norm": 0.009042283520102501, "kl": 0.2730352580547333, "learning_rate": 7.208698576471217e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3871 }, { "completion_length": 193.42857360839844, "epoch": 3.7159309021113245, "grad_norm": 1.2403225898742676, "kl": 0.28590068221092224, "learning_rate": 7.1604695543613644e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3872 }, { "completion_length": 172.57144165039062, "epoch": 3.716890595009597, "grad_norm": 0.013387874700129032, "kl": 0.3828137516975403, "learning_rate": 7.112400065632573e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3873 }, { "completion_length": 255.57144165039062, "epoch": 3.7178502879078694, "grad_norm": 0.008411051705479622, "kl": 0.25658029317855835, "learning_rate": 7.064490141863971e-09, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3874 }, { "completion_length": 187.21429443359375, "epoch": 3.718809980806142, "grad_norm": 1.472833275794983, "kl": 0.32459312677383423, "learning_rate": 7.0167398145300485e-09, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3875 }, { "completion_length": 166.85714721679688, "epoch": 3.7197696737044144, "grad_norm": 0.009752065874636173, "kl": 0.37205567955970764, "learning_rate": 6.969149115000322e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3876 }, { "completion_length": 193.07144165039062, "epoch": 3.7207293666026873, "grad_norm": 0.011577499099075794, "kl": 0.31940704584121704, "learning_rate": 6.921718074539507e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3877 }, { "completion_length": 154.71429443359375, "epoch": 3.7216890595009597, "grad_norm": 1.458639144897461, "kl": 0.477652370929718, "learning_rate": 6.8744467243074535e-09, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3878 }, { "completion_length": 225.57144165039062, "epoch": 3.722648752399232, "grad_norm": 0.5106435418128967, "kl": 0.32252198457717896, "learning_rate": 6.827335095359016e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3879 }, { "completion_length": 197.2857208251953, "epoch": 3.7236084452975047, "grad_norm": 0.019877128303050995, "kl": 0.3801794946193695, "learning_rate": 6.780383218644214e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3880 }, { "completion_length": 172.85714721679688, "epoch": 3.724568138195777, "grad_norm": 0.010264646261930466, "kl": 0.3335416913032532, "learning_rate": 6.733591125008043e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3881 }, { "completion_length": 204.00001525878906, "epoch": 3.72552783109405, "grad_norm": 1.543419361114502, "kl": 0.3166002035140991, "learning_rate": 6.686958845190521e-09, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3882 }, { "completion_length": 220.85714721679688, "epoch": 3.7264875239923225, "grad_norm": 0.944553792476654, "kl": 0.28794682025909424, "learning_rate": 6.640486409826785e-09, "loss": 0.0003, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3883 }, { "completion_length": 179.07144165039062, "epoch": 3.727447216890595, "grad_norm": 0.010419627651572227, "kl": 0.3401702046394348, "learning_rate": 6.594173849446883e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3884 }, { "completion_length": 237.1428680419922, "epoch": 3.7284069097888675, "grad_norm": 0.7759705781936646, "kl": 0.28887301683425903, "learning_rate": 6.548021194475839e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3885 }, { "completion_length": 210.00001525878906, "epoch": 3.72936660268714, "grad_norm": 1.4454275369644165, "kl": 0.2715246081352234, "learning_rate": 6.502028475233567e-09, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3886 }, { "completion_length": 203.2857208251953, "epoch": 3.730326295585413, "grad_norm": 1.1614502668380737, "kl": 0.3312382102012634, "learning_rate": 6.456195721935065e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3887 }, { "completion_length": 165.42857360839844, "epoch": 3.7312859884836853, "grad_norm": 1.9807345867156982, "kl": 0.4152242839336395, "learning_rate": 6.41052296469008e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3888 }, { "completion_length": 189.71429443359375, "epoch": 3.732245681381958, "grad_norm": 1.0013693571090698, "kl": 0.3095037043094635, "learning_rate": 6.3650102335033875e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3889 }, { "completion_length": 237.07144165039062, "epoch": 3.7332053742802302, "grad_norm": 0.0386454202234745, "kl": 0.3555830121040344, "learning_rate": 6.319657558274516e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3890 }, { "completion_length": 160.1428680419922, "epoch": 3.7341650671785027, "grad_norm": 2.924755573272705, "kl": 0.55884850025177, "learning_rate": 6.27446496879791e-09, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3891 }, { "completion_length": 225.21429443359375, "epoch": 3.7351247600767756, "grad_norm": 0.020613206550478935, "kl": 0.3286072313785553, "learning_rate": 6.229432494762849e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3892 }, { "completion_length": 162.21429443359375, "epoch": 3.736084452975048, "grad_norm": 2.0160014629364014, "kl": 0.43618255853652954, "learning_rate": 6.184560165753361e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3893 }, { "completion_length": 134.71429443359375, "epoch": 3.7370441458733206, "grad_norm": 0.019477616995573044, "kl": 0.4731142520904541, "learning_rate": 6.139848011248366e-09, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3894 }, { "completion_length": 227.4285888671875, "epoch": 3.738003838771593, "grad_norm": 1.3840131759643555, "kl": 0.32807230949401855, "learning_rate": 6.09529606062148e-09, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3895 }, { "completion_length": 228.07144165039062, "epoch": 3.7389635316698655, "grad_norm": 0.026258692145347595, "kl": 0.3582926094532013, "learning_rate": 6.0509043431410945e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3896 }, { "completion_length": 157.21429443359375, "epoch": 3.7399232245681384, "grad_norm": 1.2867950201034546, "kl": 0.38582488894462585, "learning_rate": 6.006672887970354e-09, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3897 }, { "completion_length": 147.35714721679688, "epoch": 3.740882917466411, "grad_norm": 0.9129500985145569, "kl": 0.482221394777298, "learning_rate": 5.962601724167099e-09, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3898 }, { "completion_length": 230.85714721679688, "epoch": 3.7418426103646834, "grad_norm": 1.5285570621490479, "kl": 0.31819701194763184, "learning_rate": 5.918690880683891e-09, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3899 }, { "completion_length": 203.7857208251953, "epoch": 3.742802303262956, "grad_norm": 0.8944012522697449, "kl": 0.31420204043388367, "learning_rate": 5.874940386367904e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3900 }, { "completion_length": 194.00001525878906, "epoch": 3.7437619961612283, "grad_norm": 1.2733722925186157, "kl": 0.35882100462913513, "learning_rate": 5.831350269961038e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3901 }, { "completion_length": 188.7857208251953, "epoch": 3.744721689059501, "grad_norm": 0.8827671408653259, "kl": 0.38147202134132385, "learning_rate": 5.7879205600998296e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3902 }, { "completion_length": 210.00001525878906, "epoch": 3.7456813819577737, "grad_norm": 0.023652415722608566, "kl": 0.3266482353210449, "learning_rate": 5.744651285315399e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3903 }, { "completion_length": 234.71429443359375, "epoch": 3.746641074856046, "grad_norm": 0.7226317524909973, "kl": 0.2700656056404114, "learning_rate": 5.70154247403351e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3904 }, { "completion_length": 144.85714721679688, "epoch": 3.7476007677543186, "grad_norm": 2.743370532989502, "kl": 0.5020430088043213, "learning_rate": 5.6585941545744255e-09, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3905 }, { "completion_length": 169.42857360839844, "epoch": 3.748560460652591, "grad_norm": 0.020276660099625587, "kl": 0.4866567850112915, "learning_rate": 5.615806355153102e-09, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3906 }, { "completion_length": 224.21429443359375, "epoch": 3.749520153550864, "grad_norm": 0.0128764808177948, "kl": 0.3120015263557434, "learning_rate": 5.573179103878944e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3907 }, { "completion_length": 211.92857360839844, "epoch": 3.750479846449136, "grad_norm": 0.01441597193479538, "kl": 0.32972636818885803, "learning_rate": 5.530712428755968e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3908 }, { "completion_length": 211.35714721679688, "epoch": 3.751439539347409, "grad_norm": 0.011147762648761272, "kl": 0.3025769591331482, "learning_rate": 5.488406357682579e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3909 }, { "completion_length": 226.21429443359375, "epoch": 3.7523992322456814, "grad_norm": 0.9831979274749756, "kl": 0.3146727681159973, "learning_rate": 5.446260918451739e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3910 }, { "completion_length": 210.00001525878906, "epoch": 3.753358925143954, "grad_norm": 0.9837968349456787, "kl": 0.3012121617794037, "learning_rate": 5.404276138750913e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3911 }, { "completion_length": 208.92857360839844, "epoch": 3.7543186180422263, "grad_norm": 1.584861159324646, "kl": 0.32263103127479553, "learning_rate": 5.362452046162036e-09, "loss": 0.0003, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3912 }, { "completion_length": 159.42857360839844, "epoch": 3.755278310940499, "grad_norm": 1.2513431310653687, "kl": 0.4414961338043213, "learning_rate": 5.3207886681613804e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3913 }, { "completion_length": 144.0, "epoch": 3.7562380038387717, "grad_norm": 1.9541316032409668, "kl": 0.4994313716888428, "learning_rate": 5.2792860321197175e-09, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3914 }, { "completion_length": 197.57144165039062, "epoch": 3.757197696737044, "grad_norm": 0.020796583965420723, "kl": 0.34648096561431885, "learning_rate": 5.237944165302183e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3915 }, { "completion_length": 179.6428680419922, "epoch": 3.7581573896353166, "grad_norm": 0.014606053940951824, "kl": 0.35075628757476807, "learning_rate": 5.1967630948683004e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3916 }, { "completion_length": 246.7857208251953, "epoch": 3.759117082533589, "grad_norm": 1.0329357385635376, "kl": 0.30969172716140747, "learning_rate": 5.155742847871986e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3917 }, { "completion_length": 207.1428680419922, "epoch": 3.7600767754318616, "grad_norm": 1.0737463235855103, "kl": 0.3489067256450653, "learning_rate": 5.114883451261487e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3918 }, { "completion_length": 179.00001525878906, "epoch": 3.7610364683301345, "grad_norm": 1.1121288537979126, "kl": 0.3759916424751282, "learning_rate": 5.074184931879333e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3919 }, { "completion_length": 180.57144165039062, "epoch": 3.761996161228407, "grad_norm": 1.4769893884658813, "kl": 0.3267170488834381, "learning_rate": 5.033647316462469e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3920 }, { "completion_length": 127.42857360839844, "epoch": 3.7629558541266794, "grad_norm": 1.5632885694503784, "kl": 0.5369675159454346, "learning_rate": 4.993270631642038e-09, "loss": 0.0005, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3921 }, { "completion_length": 224.85714721679688, "epoch": 3.763915547024952, "grad_norm": 0.8958075642585754, "kl": 0.311603844165802, "learning_rate": 4.953054903943516e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3922 }, { "completion_length": 222.00001525878906, "epoch": 3.7648752399232244, "grad_norm": 0.01883944310247898, "kl": 0.334329217672348, "learning_rate": 4.913000159786573e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3923 }, { "completion_length": 224.07144165039062, "epoch": 3.7658349328214973, "grad_norm": 0.12779365479946136, "kl": 0.5799219012260437, "learning_rate": 4.873106425485191e-09, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3924 }, { "completion_length": 182.00001525878906, "epoch": 3.7667946257197698, "grad_norm": 0.8174317479133606, "kl": 0.33322906494140625, "learning_rate": 4.8333737272475694e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3925 }, { "completion_length": 172.42857360839844, "epoch": 3.767754318618042, "grad_norm": 1.3834545612335205, "kl": 0.38960975408554077, "learning_rate": 4.79380209117608e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3926 }, { "completion_length": 156.85714721679688, "epoch": 3.7687140115163147, "grad_norm": 2.0504963397979736, "kl": 0.3891504406929016, "learning_rate": 4.754391543267316e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3927 }, { "completion_length": 219.4285888671875, "epoch": 3.769673704414587, "grad_norm": 0.00968301109969616, "kl": 0.28959596157073975, "learning_rate": 4.715142109412013e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3928 }, { "completion_length": 282.4285888671875, "epoch": 3.77063339731286, "grad_norm": 0.011177824810147285, "kl": 0.27275919914245605, "learning_rate": 4.67605381539507e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3929 }, { "completion_length": 237.1428680419922, "epoch": 3.7715930902111325, "grad_norm": 0.009649254381656647, "kl": 0.2891879975795746, "learning_rate": 4.637126686895532e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3930 }, { "completion_length": 160.6428680419922, "epoch": 3.772552783109405, "grad_norm": 1.2540463209152222, "kl": 0.35316577553749084, "learning_rate": 4.59836074948658e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3931 }, { "completion_length": 171.6428680419922, "epoch": 3.7735124760076775, "grad_norm": 0.015019064769148827, "kl": 0.3535713255405426, "learning_rate": 4.559756028635536e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3932 }, { "completion_length": 265.4285888671875, "epoch": 3.77447216890595, "grad_norm": 0.008499777875840664, "kl": 0.2827429473400116, "learning_rate": 4.521312549703671e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3933 }, { "completion_length": 189.35714721679688, "epoch": 3.775431861804223, "grad_norm": 0.013978462666273117, "kl": 0.35088929533958435, "learning_rate": 4.483030337946475e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3934 }, { "completion_length": 212.7857208251953, "epoch": 3.7763915547024953, "grad_norm": 0.012485097162425518, "kl": 0.3520379960536957, "learning_rate": 4.444909418513443e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3935 }, { "completion_length": 285.2857360839844, "epoch": 3.777351247600768, "grad_norm": 0.010697700083255768, "kl": 0.23669278621673584, "learning_rate": 4.406949816448097e-09, "loss": 0.0002, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3936 }, { "completion_length": 196.57144165039062, "epoch": 3.7783109404990403, "grad_norm": 0.014231154695153236, "kl": 0.32110610604286194, "learning_rate": 4.369151556687989e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3937 }, { "completion_length": 204.00001525878906, "epoch": 3.7792706333973127, "grad_norm": 0.016836294904351234, "kl": 0.34712353348731995, "learning_rate": 4.331514664064645e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3938 }, { "completion_length": 136.1428680419922, "epoch": 3.7802303262955856, "grad_norm": 0.015332636423408985, "kl": 0.5000780820846558, "learning_rate": 4.294039163303675e-09, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3939 }, { "completion_length": 161.7857208251953, "epoch": 3.781190019193858, "grad_norm": 1.2781989574432373, "kl": 0.3257073760032654, "learning_rate": 4.256725079024553e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3940 }, { "completion_length": 183.35714721679688, "epoch": 3.7821497120921306, "grad_norm": 0.02344488725066185, "kl": 0.3854253888130188, "learning_rate": 4.219572435740809e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3941 }, { "completion_length": 212.2857208251953, "epoch": 3.783109404990403, "grad_norm": 1.1235771179199219, "kl": 0.27932870388031006, "learning_rate": 4.182581257859807e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3942 }, { "completion_length": 201.00001525878906, "epoch": 3.7840690978886755, "grad_norm": 0.7316555380821228, "kl": 0.34856435656547546, "learning_rate": 4.145751569682915e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3943 }, { "completion_length": 183.92857360839844, "epoch": 3.7850287907869484, "grad_norm": 0.009856321848928928, "kl": 0.3457295298576355, "learning_rate": 4.109083395405416e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3944 }, { "completion_length": 106.92857360839844, "epoch": 3.785988483685221, "grad_norm": 2.2163748741149902, "kl": 0.5552968978881836, "learning_rate": 4.0725767591164305e-09, "loss": 0.0006, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3945 }, { "completion_length": 209.71429443359375, "epoch": 3.7869481765834934, "grad_norm": 1.2895225286483765, "kl": 0.2757641673088074, "learning_rate": 4.036231684798996e-09, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3946 }, { "completion_length": 235.00001525878906, "epoch": 3.787907869481766, "grad_norm": 0.01585318148136139, "kl": 0.28436529636383057, "learning_rate": 4.000048196330014e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3947 }, { "completion_length": 167.0, "epoch": 3.7888675623800383, "grad_norm": 1.43354332447052, "kl": 0.41655412316322327, "learning_rate": 3.96402631748019e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3948 }, { "completion_length": 166.42857360839844, "epoch": 3.789827255278311, "grad_norm": 0.012596962973475456, "kl": 0.35266536474227905, "learning_rate": 3.928166071914096e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3949 }, { "completion_length": 222.07144165039062, "epoch": 3.7907869481765832, "grad_norm": 1.0051923990249634, "kl": 0.3283822238445282, "learning_rate": 3.892467483190137e-09, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3950 }, { "completion_length": 149.2857208251953, "epoch": 3.791746641074856, "grad_norm": 1.2187893390655518, "kl": 0.6008193492889404, "learning_rate": 3.856930574760497e-09, "loss": 0.0006, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3951 }, { "completion_length": 185.71429443359375, "epoch": 3.7927063339731286, "grad_norm": 0.008611822500824928, "kl": 0.3233978748321533, "learning_rate": 3.8215553699710855e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3952 }, { "completion_length": 184.57144165039062, "epoch": 3.793666026871401, "grad_norm": 1.1409248113632202, "kl": 0.3910209834575653, "learning_rate": 3.7863418920617004e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3953 }, { "completion_length": 152.71429443359375, "epoch": 3.7946257197696736, "grad_norm": 2.117875099182129, "kl": 0.3825949430465698, "learning_rate": 3.75129016416581e-09, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3954 }, { "completion_length": 173.7857208251953, "epoch": 3.795585412667946, "grad_norm": 0.013668264262378216, "kl": 0.3942309021949768, "learning_rate": 3.7164002093106327e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3955 }, { "completion_length": 199.07144165039062, "epoch": 3.796545105566219, "grad_norm": 0.8391189575195312, "kl": 0.3396845757961273, "learning_rate": 3.6816720504171116e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3956 }, { "completion_length": 163.5, "epoch": 3.7975047984644914, "grad_norm": 1.2647745609283447, "kl": 0.4095102548599243, "learning_rate": 3.647105710299886e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3957 }, { "completion_length": 144.35714721679688, "epoch": 3.798464491362764, "grad_norm": 0.7463274598121643, "kl": 0.4201210141181946, "learning_rate": 3.6127012116673463e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3958 }, { "completion_length": 235.57144165039062, "epoch": 3.7994241842610363, "grad_norm": 0.01117816660553217, "kl": 0.26909369230270386, "learning_rate": 3.5784585771215235e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3959 }, { "completion_length": 208.71429443359375, "epoch": 3.800383877159309, "grad_norm": 0.6979339718818665, "kl": 0.35971906781196594, "learning_rate": 3.5443778291580627e-09, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3960 }, { "completion_length": 136.21429443359375, "epoch": 3.8013435700575817, "grad_norm": 2.206610918045044, "kl": 0.4875120520591736, "learning_rate": 3.5104589901663317e-09, "loss": 0.0005, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3961 }, { "completion_length": 221.21429443359375, "epoch": 3.802303262955854, "grad_norm": 0.008105576038360596, "kl": 0.2690197825431824, "learning_rate": 3.4767020824293393e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3962 }, { "completion_length": 232.21429443359375, "epoch": 3.8032629558541267, "grad_norm": 0.016194183379411697, "kl": 0.2706940770149231, "learning_rate": 3.4431071281236234e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3963 }, { "completion_length": 147.2857208251953, "epoch": 3.804222648752399, "grad_norm": 0.014733432792127132, "kl": 0.49679017066955566, "learning_rate": 3.4096741493194193e-09, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3964 }, { "completion_length": 209.92857360839844, "epoch": 3.8051823416506716, "grad_norm": 0.010985284112393856, "kl": 0.3043674826622009, "learning_rate": 3.3764031679804905e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3965 }, { "completion_length": 211.07144165039062, "epoch": 3.8061420345489445, "grad_norm": 0.010399349965155125, "kl": 0.274086058139801, "learning_rate": 3.343294205964214e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3966 }, { "completion_length": 189.00001525878906, "epoch": 3.807101727447217, "grad_norm": 1.119625210762024, "kl": 0.39881497621536255, "learning_rate": 3.3103472850215254e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3967 }, { "completion_length": 208.00001525878906, "epoch": 3.8080614203454894, "grad_norm": 0.010855250060558319, "kl": 0.35099080204963684, "learning_rate": 3.277562426796859e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3968 }, { "completion_length": 174.07144165039062, "epoch": 3.809021113243762, "grad_norm": 1.5359864234924316, "kl": 0.4690759778022766, "learning_rate": 3.244939652828266e-09, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3969 }, { "completion_length": 227.6428680419922, "epoch": 3.8099808061420344, "grad_norm": 1.8245314359664917, "kl": 0.30542582273483276, "learning_rate": 3.212478984547268e-09, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3970 }, { "completion_length": 199.07144165039062, "epoch": 3.8109404990403073, "grad_norm": 0.017369132488965988, "kl": 0.35695502161979675, "learning_rate": 3.1801804432788347e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3971 }, { "completion_length": 204.92857360839844, "epoch": 3.8119001919385798, "grad_norm": 0.013423120602965355, "kl": 0.29269683361053467, "learning_rate": 3.148044050241577e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3972 }, { "completion_length": 203.85714721679688, "epoch": 3.8128598848368522, "grad_norm": 1.0029667615890503, "kl": 0.36677151918411255, "learning_rate": 3.1160698265474406e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3973 }, { "completion_length": 211.6428680419922, "epoch": 3.8138195777351247, "grad_norm": 0.013023951090872288, "kl": 0.3102209270000458, "learning_rate": 3.084257793201872e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3974 }, { "completion_length": 270.0, "epoch": 3.814779270633397, "grad_norm": 0.013916467316448689, "kl": 0.29423585534095764, "learning_rate": 3.052607971103821e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3975 }, { "completion_length": 185.6428680419922, "epoch": 3.81573896353167, "grad_norm": 0.02179834060370922, "kl": 0.3488014340400696, "learning_rate": 3.0211203810455998e-09, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3976 }, { "completion_length": 213.71429443359375, "epoch": 3.8166986564299425, "grad_norm": 0.7148424983024597, "kl": 0.358264297246933, "learning_rate": 2.9897950437129673e-09, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3977 }, { "completion_length": 159.07144165039062, "epoch": 3.817658349328215, "grad_norm": 1.8242460489273071, "kl": 0.45495301485061646, "learning_rate": 2.9586319796851555e-09, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3978 }, { "completion_length": 249.85714721679688, "epoch": 3.8186180422264875, "grad_norm": 0.012404732406139374, "kl": 0.27654924988746643, "learning_rate": 2.927631209434678e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3979 }, { "completion_length": 171.85714721679688, "epoch": 3.81957773512476, "grad_norm": 1.0428351163864136, "kl": 0.3557594418525696, "learning_rate": 2.8967927533274928e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3980 }, { "completion_length": 182.7857208251953, "epoch": 3.820537428023033, "grad_norm": 0.013336259871721268, "kl": 0.37002238631248474, "learning_rate": 2.8661166316229223e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3981 }, { "completion_length": 151.1428680419922, "epoch": 3.8214971209213053, "grad_norm": 1.8105970621109009, "kl": 0.44254088401794434, "learning_rate": 2.835602864473652e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3982 }, { "completion_length": 181.35714721679688, "epoch": 3.822456813819578, "grad_norm": 0.023060062900185585, "kl": 0.36164042353630066, "learning_rate": 2.8052514719256737e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3983 }, { "completion_length": 161.6428680419922, "epoch": 3.8234165067178503, "grad_norm": 1.2221312522888184, "kl": 0.37187597155570984, "learning_rate": 2.7750624739183436e-09, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3984 }, { "completion_length": 147.85714721679688, "epoch": 3.8243761996161227, "grad_norm": 0.013391690328717232, "kl": 0.4106835722923279, "learning_rate": 2.7450358902842974e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3985 }, { "completion_length": 181.6428680419922, "epoch": 3.8253358925143957, "grad_norm": 0.016139794141054153, "kl": 0.35968318581581116, "learning_rate": 2.7151717407495333e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3986 }, { "completion_length": 163.0, "epoch": 3.8262955854126677, "grad_norm": 1.0668833255767822, "kl": 0.4052383601665497, "learning_rate": 2.6854700449332746e-09, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3987 }, { "completion_length": 201.71429443359375, "epoch": 3.8272552783109406, "grad_norm": 1.4411678314208984, "kl": 0.34343063831329346, "learning_rate": 2.6559308223480236e-09, "loss": 0.0003, "reward": 0.7142857313156128, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.7142857313156128, "step": 3988 }, { "completion_length": 225.50001525878906, "epoch": 3.828214971209213, "grad_norm": 0.010040715336799622, "kl": 0.32723140716552734, "learning_rate": 2.6265540923996187e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3989 }, { "completion_length": 249.71429443359375, "epoch": 3.8291746641074855, "grad_norm": 0.5632317662239075, "kl": 0.5914551615715027, "learning_rate": 2.597339874387039e-09, "loss": 0.0006, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3990 }, { "completion_length": 187.35714721679688, "epoch": 3.830134357005758, "grad_norm": 0.015994608402252197, "kl": 0.347960889339447, "learning_rate": 2.568288187502571e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3991 }, { "completion_length": 195.50001525878906, "epoch": 3.8310940499040305, "grad_norm": 1.162766933441162, "kl": 0.43827587366104126, "learning_rate": 2.539399050831753e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3992 }, { "completion_length": 192.85714721679688, "epoch": 3.8320537428023034, "grad_norm": 1.9236512184143066, "kl": 0.3495785892009735, "learning_rate": 2.5106724833532656e-09, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 3993 }, { "completion_length": 259.14288330078125, "epoch": 3.833013435700576, "grad_norm": 0.023714613169431686, "kl": 0.3103225827217102, "learning_rate": 2.48210850393904e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3994 }, { "completion_length": 149.42857360839844, "epoch": 3.8339731285988483, "grad_norm": 1.1156513690948486, "kl": 0.3855169117450714, "learning_rate": 2.4537071313541493e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 3995 }, { "completion_length": 192.92857360839844, "epoch": 3.834932821497121, "grad_norm": 0.045828647911548615, "kl": 0.45295900106430054, "learning_rate": 2.4254683842568903e-09, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3996 }, { "completion_length": 185.57144165039062, "epoch": 3.8358925143953932, "grad_norm": 2.0400071144104004, "kl": 0.42561906576156616, "learning_rate": 2.397392281198729e-09, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 3997 }, { "completion_length": 216.2857208251953, "epoch": 3.836852207293666, "grad_norm": 0.9157236218452454, "kl": 0.29253214597702026, "learning_rate": 2.3694788406241894e-09, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 3998 }, { "completion_length": 176.71429443359375, "epoch": 3.8378119001919386, "grad_norm": 0.01237716805189848, "kl": 0.38220176100730896, "learning_rate": 2.3417280808710473e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 3999 }, { "completion_length": 213.00001525878906, "epoch": 3.838771593090211, "grad_norm": 1.6870381832122803, "kl": 0.3672199845314026, "learning_rate": 2.3141400201701355e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4000 }, { "completion_length": 137.7857208251953, "epoch": 3.8397312859884836, "grad_norm": 1.0927585363388062, "kl": 0.43745654821395874, "learning_rate": 2.286714676645457e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4001 }, { "completion_length": 166.57144165039062, "epoch": 3.840690978886756, "grad_norm": 2.2027244567871094, "kl": 0.4537833631038666, "learning_rate": 2.2594520683140715e-09, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4002 }, { "completion_length": 211.35714721679688, "epoch": 3.841650671785029, "grad_norm": 1.2441211938858032, "kl": 0.32886287569999695, "learning_rate": 2.232352213086125e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4003 }, { "completion_length": 195.2857208251953, "epoch": 3.8426103646833014, "grad_norm": 0.01296982727944851, "kl": 0.3376237154006958, "learning_rate": 2.205415128764876e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4004 }, { "completion_length": 231.35714721679688, "epoch": 3.843570057581574, "grad_norm": 0.011234140023589134, "kl": 0.30275312066078186, "learning_rate": 2.1786408330466133e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4005 }, { "completion_length": 183.1428680419922, "epoch": 3.8445297504798464, "grad_norm": 0.013347922824323177, "kl": 0.37092575430870056, "learning_rate": 2.152029343520767e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4006 }, { "completion_length": 284.8571472167969, "epoch": 3.845489443378119, "grad_norm": 1.0940486192703247, "kl": 0.20956280827522278, "learning_rate": 2.125580677669686e-09, "loss": 0.0002, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4007 }, { "completion_length": 195.92857360839844, "epoch": 3.8464491362763917, "grad_norm": 0.9907779097557068, "kl": 0.3197537064552307, "learning_rate": 2.0992948528688605e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4008 }, { "completion_length": 168.85714721679688, "epoch": 3.847408829174664, "grad_norm": 1.8394607305526733, "kl": 0.3822242319583893, "learning_rate": 2.0731718863867263e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4009 }, { "completion_length": 169.2857208251953, "epoch": 3.8483685220729367, "grad_norm": 1.7464721202850342, "kl": 0.39807531237602234, "learning_rate": 2.0472117953847513e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4010 }, { "completion_length": 153.92857360839844, "epoch": 3.849328214971209, "grad_norm": 0.014348560012876987, "kl": 0.42905527353286743, "learning_rate": 2.021414596917459e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4011 }, { "completion_length": 153.07144165039062, "epoch": 3.8502879078694816, "grad_norm": 1.0545414686203003, "kl": 0.38392573595046997, "learning_rate": 1.995780307932293e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4012 }, { "completion_length": 201.71429443359375, "epoch": 3.8512476007677545, "grad_norm": 1.1472232341766357, "kl": 0.3214842677116394, "learning_rate": 1.9703089452697e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4013 }, { "completion_length": 229.6428680419922, "epoch": 3.852207293666027, "grad_norm": 1.3294621706008911, "kl": 0.34088340401649475, "learning_rate": 1.945000525663043e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4014 }, { "completion_length": 189.21429443359375, "epoch": 3.8531669865642995, "grad_norm": 1.8361014127731323, "kl": 0.4018392860889435, "learning_rate": 1.9198550657387458e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4015 }, { "completion_length": 195.92857360839844, "epoch": 3.854126679462572, "grad_norm": 0.5555241107940674, "kl": 0.3606610596179962, "learning_rate": 1.8948725820160662e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4016 }, { "completion_length": 252.07144165039062, "epoch": 3.8550863723608444, "grad_norm": 0.008098207414150238, "kl": 0.28008800745010376, "learning_rate": 1.870053090907264e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4017 }, { "completion_length": 208.00001525878906, "epoch": 3.8560460652591173, "grad_norm": 0.011683929711580276, "kl": 0.31614309549331665, "learning_rate": 1.845396608717492e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4018 }, { "completion_length": 191.1428680419922, "epoch": 3.8570057581573898, "grad_norm": 1.9090255498886108, "kl": 0.3587980568408966, "learning_rate": 1.8209031516448203e-09, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 4019 }, { "completion_length": 199.21429443359375, "epoch": 3.8579654510556622, "grad_norm": 1.3045103549957275, "kl": 0.3661072552204132, "learning_rate": 1.7965727357802385e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4020 }, { "completion_length": 150.85714721679688, "epoch": 3.8589251439539347, "grad_norm": 1.3945246934890747, "kl": 0.4717441499233246, "learning_rate": 1.7724053771075998e-09, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4021 }, { "completion_length": 198.57144165039062, "epoch": 3.859884836852207, "grad_norm": 1.3113000392913818, "kl": 0.31551557779312134, "learning_rate": 1.7484010915036208e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4022 }, { "completion_length": 178.57144165039062, "epoch": 3.86084452975048, "grad_norm": 1.5758943557739258, "kl": 0.3646201491355896, "learning_rate": 1.724559894737937e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4023 }, { "completion_length": 178.42857360839844, "epoch": 3.8618042226487526, "grad_norm": 0.012662638910114765, "kl": 0.3258208632469177, "learning_rate": 1.7008818024730198e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4024 }, { "completion_length": 199.6428680419922, "epoch": 3.862763915547025, "grad_norm": 1.3748579025268555, "kl": 0.3715533912181854, "learning_rate": 1.6773668302641764e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4025 }, { "completion_length": 171.42857360839844, "epoch": 3.8637236084452975, "grad_norm": 1.433210015296936, "kl": 0.41022828221321106, "learning_rate": 1.654014993559577e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4026 }, { "completion_length": 252.1428680419922, "epoch": 3.86468330134357, "grad_norm": 0.013253966346383095, "kl": 0.27129265666007996, "learning_rate": 1.6308263077001727e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4027 }, { "completion_length": 207.6428680419922, "epoch": 3.865642994241843, "grad_norm": 1.3805639743804932, "kl": 0.3656175434589386, "learning_rate": 1.6078007879198051e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4028 }, { "completion_length": 160.0, "epoch": 3.866602687140115, "grad_norm": 1.2882356643676758, "kl": 0.39613011479377747, "learning_rate": 1.5849384493450413e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4029 }, { "completion_length": 185.6428680419922, "epoch": 3.867562380038388, "grad_norm": 0.8403825163841248, "kl": 0.3109598159790039, "learning_rate": 1.5622393069953665e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4030 }, { "completion_length": 189.71429443359375, "epoch": 3.8685220729366603, "grad_norm": 0.7530092597007751, "kl": 0.4177299737930298, "learning_rate": 1.5397033757828803e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4031 }, { "completion_length": 181.50001525878906, "epoch": 3.8694817658349328, "grad_norm": 0.009743161499500275, "kl": 0.329790323972702, "learning_rate": 1.5173306705126287e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4032 }, { "completion_length": 211.7857208251953, "epoch": 3.870441458733205, "grad_norm": 0.01214601006358862, "kl": 0.32295137643814087, "learning_rate": 1.495121205882327e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4033 }, { "completion_length": 176.00001525878906, "epoch": 3.8714011516314777, "grad_norm": 1.6885985136032104, "kl": 0.38957536220550537, "learning_rate": 1.4730749964824985e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4034 }, { "completion_length": 191.85714721679688, "epoch": 3.8723608445297506, "grad_norm": 0.6829788684844971, "kl": 0.3590591251850128, "learning_rate": 1.4511920567963908e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4035 }, { "completion_length": 120.00000762939453, "epoch": 3.873320537428023, "grad_norm": 0.8769298195838928, "kl": 0.6825570464134216, "learning_rate": 1.4294724011999493e-09, "loss": 0.0007, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4036 }, { "completion_length": 180.57144165039062, "epoch": 3.8742802303262955, "grad_norm": 0.018070975318551064, "kl": 0.3712847828865051, "learning_rate": 1.407916043961982e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4037 }, { "completion_length": 159.85714721679688, "epoch": 3.875239923224568, "grad_norm": 1.2071690559387207, "kl": 0.3558211922645569, "learning_rate": 1.3865229992438554e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4038 }, { "completion_length": 162.85714721679688, "epoch": 3.8761996161228405, "grad_norm": 0.040964554995298386, "kl": 0.42642077803611755, "learning_rate": 1.3652932810997719e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4039 }, { "completion_length": 190.92857360839844, "epoch": 3.8771593090211134, "grad_norm": 0.9103614687919617, "kl": 0.3315088450908661, "learning_rate": 1.3442269034765752e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4040 }, { "completion_length": 137.5, "epoch": 3.878119001919386, "grad_norm": 1.1269418001174927, "kl": 0.39030295610427856, "learning_rate": 1.3233238802138058e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4041 }, { "completion_length": 175.00001525878906, "epoch": 3.8790786948176583, "grad_norm": 0.013455353677272797, "kl": 0.36107268929481506, "learning_rate": 1.3025842250437568e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4042 }, { "completion_length": 181.35714721679688, "epoch": 3.880038387715931, "grad_norm": 0.010419495403766632, "kl": 0.384062796831131, "learning_rate": 1.2820079515912519e-09, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4043 }, { "completion_length": 210.07144165039062, "epoch": 3.8809980806142033, "grad_norm": 0.012346111238002777, "kl": 0.31209349632263184, "learning_rate": 1.26159507337395e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4044 }, { "completion_length": 163.42857360839844, "epoch": 3.881957773512476, "grad_norm": 1.2983266115188599, "kl": 0.3472115695476532, "learning_rate": 1.2413456038020409e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4045 }, { "completion_length": 212.21429443359375, "epoch": 3.8829174664107486, "grad_norm": 0.01230142917484045, "kl": 0.290396124124527, "learning_rate": 1.2212595561784112e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4046 }, { "completion_length": 157.42857360839844, "epoch": 3.883877159309021, "grad_norm": 0.017672335729002953, "kl": 0.429766982793808, "learning_rate": 1.2013369436986164e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4047 }, { "completion_length": 184.50001525878906, "epoch": 3.8848368522072936, "grad_norm": 2.1018764972686768, "kl": 0.3480302095413208, "learning_rate": 1.1815777794507709e-09, "loss": 0.0003, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 4048 }, { "completion_length": 180.6428680419922, "epoch": 3.885796545105566, "grad_norm": 0.015285073779523373, "kl": 0.37337154150009155, "learning_rate": 1.1619820764157128e-09, "loss": 0.0004, "reward": 0.7142857313156128, "reward_std": 0.0, "rewards/check_gptzero_func": 0.7142857313156128, "step": 4049 }, { "completion_length": 232.50001525878906, "epoch": 3.886756238003839, "grad_norm": 1.4377743005752563, "kl": 0.3511527180671692, "learning_rate": 1.1425498474667839e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4050 }, { "completion_length": 206.1428680419922, "epoch": 3.8877159309021114, "grad_norm": 0.020057538524270058, "kl": 0.3824993073940277, "learning_rate": 1.1232811053699943e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4051 }, { "completion_length": 162.6428680419922, "epoch": 3.888675623800384, "grad_norm": 1.3676106929779053, "kl": 0.3796802759170532, "learning_rate": 1.1041758627839404e-09, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4052 }, { "completion_length": 247.71429443359375, "epoch": 3.8896353166986564, "grad_norm": 1.0579055547714233, "kl": 0.32934466004371643, "learning_rate": 1.0852341322598047e-09, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4053 }, { "completion_length": 244.07144165039062, "epoch": 3.890595009596929, "grad_norm": 0.014211983419954777, "kl": 0.33671772480010986, "learning_rate": 1.066455926241383e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4054 }, { "completion_length": 178.7857208251953, "epoch": 3.8915547024952017, "grad_norm": 0.048670582473278046, "kl": 0.4066251218318939, "learning_rate": 1.0478412570649742e-09, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4055 }, { "completion_length": 196.92857360839844, "epoch": 3.892514395393474, "grad_norm": 0.025099724531173706, "kl": 0.32037153840065, "learning_rate": 1.0293901369594903e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4056 }, { "completion_length": 280.0714416503906, "epoch": 3.8934740882917467, "grad_norm": 0.009729502722620964, "kl": 0.28069770336151123, "learning_rate": 1.0111025780464021e-09, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4057 }, { "completion_length": 199.1428680419922, "epoch": 3.894433781190019, "grad_norm": 1.0476795434951782, "kl": 0.33552640676498413, "learning_rate": 9.929785923397382e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4058 }, { "completion_length": 218.50001525878906, "epoch": 3.8953934740882916, "grad_norm": 2.9657411575317383, "kl": 0.4116988480091095, "learning_rate": 9.75018191746002e-10, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 4059 }, { "completion_length": 208.6428680419922, "epoch": 3.8963531669865645, "grad_norm": 0.009091930463910103, "kl": 0.3235475420951843, "learning_rate": 9.57221388064311e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4060 }, { "completion_length": 180.21429443359375, "epoch": 3.897312859884837, "grad_norm": 1.4563945531845093, "kl": 0.3458452820777893, "learning_rate": 9.395881929862571e-10, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4061 }, { "completion_length": 196.6428680419922, "epoch": 3.8982725527831095, "grad_norm": 1.957497477531433, "kl": 0.43055152893066406, "learning_rate": 9.221186180959629e-10, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 4062 }, { "completion_length": 204.7857208251953, "epoch": 3.899232245681382, "grad_norm": 1.899460792541504, "kl": 0.3846285939216614, "learning_rate": 9.048126748700536e-10, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.785714328289032, "step": 4063 }, { "completion_length": 173.85714721679688, "epoch": 3.9001919385796544, "grad_norm": 0.024125663563609123, "kl": 0.383096843957901, "learning_rate": 8.876703746776848e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4064 }, { "completion_length": 167.85714721679688, "epoch": 3.9011516314779273, "grad_norm": 0.05751991271972656, "kl": 0.5531782507896423, "learning_rate": 8.706917287804594e-10, "loss": 0.0006, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4065 }, { "completion_length": 225.21429443359375, "epoch": 3.9021113243761993, "grad_norm": 0.012594482861459255, "kl": 0.3175846338272095, "learning_rate": 8.538767483325383e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4066 }, { "completion_length": 169.1428680419922, "epoch": 3.9030710172744723, "grad_norm": 0.6754401922225952, "kl": 0.4238206446170807, "learning_rate": 8.372254443805016e-10, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4067 }, { "completion_length": 142.71429443359375, "epoch": 3.9040307101727447, "grad_norm": 1.8291935920715332, "kl": 0.4209122061729431, "learning_rate": 8.20737827863377e-10, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4068 }, { "completion_length": 164.57144165039062, "epoch": 3.904990403071017, "grad_norm": 1.5076531171798706, "kl": 0.483065664768219, "learning_rate": 8.044139096127778e-10, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4069 }, { "completion_length": 156.21429443359375, "epoch": 3.90595009596929, "grad_norm": 1.5406070947647095, "kl": 0.37746092677116394, "learning_rate": 7.882537003526257e-10, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4070 }, { "completion_length": 173.1428680419922, "epoch": 3.906909788867562, "grad_norm": 1.262888789176941, "kl": 0.369846373796463, "learning_rate": 7.722572106994285e-10, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4071 }, { "completion_length": 149.21429443359375, "epoch": 3.907869481765835, "grad_norm": 0.8352395296096802, "kl": 0.4883543848991394, "learning_rate": 7.564244511620299e-10, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4072 }, { "completion_length": 258.4285888671875, "epoch": 3.9088291746641075, "grad_norm": 0.6672773957252502, "kl": 0.2297784686088562, "learning_rate": 7.407554321417764e-10, "loss": 0.0002, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4073 }, { "completion_length": 161.1428680419922, "epoch": 3.90978886756238, "grad_norm": 1.6248905658721924, "kl": 0.44635871052742004, "learning_rate": 7.252501639324337e-10, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4074 }, { "completion_length": 183.7857208251953, "epoch": 3.9107485604606524, "grad_norm": 0.011407656595110893, "kl": 0.33525368571281433, "learning_rate": 7.099086567201873e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4075 }, { "completion_length": 205.7857208251953, "epoch": 3.911708253358925, "grad_norm": 0.008057471364736557, "kl": 0.37025898694992065, "learning_rate": 6.947309205836139e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4076 }, { "completion_length": 227.6428680419922, "epoch": 3.912667946257198, "grad_norm": 1.3634071350097656, "kl": 0.3525437116622925, "learning_rate": 6.797169654937373e-10, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4077 }, { "completion_length": 184.1428680419922, "epoch": 3.9136276391554703, "grad_norm": 1.287603735923767, "kl": 0.3440512418746948, "learning_rate": 6.648668013140013e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4078 }, { "completion_length": 155.5, "epoch": 3.9145873320537428, "grad_norm": 1.7280957698822021, "kl": 0.405929833650589, "learning_rate": 6.501804378001574e-10, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4079 }, { "completion_length": 151.2857208251953, "epoch": 3.9155470249520152, "grad_norm": 1.0484884977340698, "kl": 0.5039401650428772, "learning_rate": 6.356578846004323e-10, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4080 }, { "completion_length": 191.2857208251953, "epoch": 3.9165067178502877, "grad_norm": 1.5386621952056885, "kl": 0.36922743916511536, "learning_rate": 6.212991512554721e-10, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4081 }, { "completion_length": 223.6428680419922, "epoch": 3.9174664107485606, "grad_norm": 0.01162699144333601, "kl": 0.2641673684120178, "learning_rate": 6.07104247198148e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4082 }, { "completion_length": 241.07144165039062, "epoch": 3.918426103646833, "grad_norm": 0.10733556747436523, "kl": 0.3976191282272339, "learning_rate": 5.930731817538893e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4083 }, { "completion_length": 148.92857360839844, "epoch": 3.9193857965451055, "grad_norm": 0.015079192817211151, "kl": 0.43644416332244873, "learning_rate": 5.792059641403224e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4084 }, { "completion_length": 238.50001525878906, "epoch": 3.920345489443378, "grad_norm": 1.0080676078796387, "kl": 0.26399803161621094, "learning_rate": 5.65502603467577e-10, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 4085 }, { "completion_length": 163.2857208251953, "epoch": 3.9213051823416505, "grad_norm": 1.303503155708313, "kl": 0.3974023163318634, "learning_rate": 5.51963108738035e-10, "loss": 0.0004, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 4086 }, { "completion_length": 178.6428680419922, "epoch": 3.9222648752399234, "grad_norm": 1.2852158546447754, "kl": 0.3265431523323059, "learning_rate": 5.385874888464704e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4087 }, { "completion_length": 216.85714721679688, "epoch": 3.923224568138196, "grad_norm": 0.01187992189079523, "kl": 0.3064866065979004, "learning_rate": 5.253757525800207e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4088 }, { "completion_length": 169.21429443359375, "epoch": 3.9241842610364683, "grad_norm": 1.0781852006912231, "kl": 0.4567772150039673, "learning_rate": 5.123279086181042e-10, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4089 }, { "completion_length": 237.50001525878906, "epoch": 3.925143953934741, "grad_norm": 1.736254096031189, "kl": 0.3218550682067871, "learning_rate": 4.994439655324756e-10, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4090 }, { "completion_length": 175.21429443359375, "epoch": 3.9261036468330133, "grad_norm": 0.9686532616615295, "kl": 0.4928711950778961, "learning_rate": 4.867239317873085e-10, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4091 }, { "completion_length": 202.07144165039062, "epoch": 3.927063339731286, "grad_norm": 0.7351035475730896, "kl": 0.30728811025619507, "learning_rate": 4.741678157389739e-10, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 4092 }, { "completion_length": 197.71429443359375, "epoch": 3.9280230326295587, "grad_norm": 0.0137623380869627, "kl": 0.3399880528450012, "learning_rate": 4.6177562563623484e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4093 }, { "completion_length": 203.7857208251953, "epoch": 3.928982725527831, "grad_norm": 0.010432439856231213, "kl": 0.33283740282058716, "learning_rate": 4.495473696201346e-10, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4094 }, { "completion_length": 129.35714721679688, "epoch": 3.9299424184261036, "grad_norm": 0.0162770077586174, "kl": 0.4800567924976349, "learning_rate": 4.3748305572399726e-10, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4095 }, { "completion_length": 215.42857360839844, "epoch": 3.930902111324376, "grad_norm": 1.2311519384384155, "kl": 0.31952083110809326, "learning_rate": 4.255826918735106e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4096 }, { "completion_length": 157.6428680419922, "epoch": 3.931861804222649, "grad_norm": 1.2152286767959595, "kl": 0.40719836950302124, "learning_rate": 4.1384628588658766e-10, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4097 }, { "completion_length": 197.2857208251953, "epoch": 3.9328214971209214, "grad_norm": 0.01325320079922676, "kl": 0.3329126238822937, "learning_rate": 4.0227384547344976e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4098 }, { "completion_length": 172.6428680419922, "epoch": 3.933781190019194, "grad_norm": 0.028241265565156937, "kl": 0.4034551978111267, "learning_rate": 3.9086537823665443e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4099 }, { "completion_length": 168.1428680419922, "epoch": 3.9347408829174664, "grad_norm": 0.01642497070133686, "kl": 0.3743857145309448, "learning_rate": 3.7962089167095645e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4100 }, { "completion_length": 222.1428680419922, "epoch": 3.935700575815739, "grad_norm": 0.014246395789086819, "kl": 0.2821890115737915, "learning_rate": 3.68540393163419e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4101 }, { "completion_length": 128.6428680419922, "epoch": 3.9366602687140118, "grad_norm": 1.8157674074172974, "kl": 0.5159949660301208, "learning_rate": 3.576238899933859e-10, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4102 }, { "completion_length": 191.57144165039062, "epoch": 3.9376199616122842, "grad_norm": 1.92849600315094, "kl": 0.367367684841156, "learning_rate": 3.4687138933245377e-10, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4103 }, { "completion_length": 137.21429443359375, "epoch": 3.9385796545105567, "grad_norm": 0.012828943319618702, "kl": 0.4268530011177063, "learning_rate": 3.3628289824444434e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4104 }, { "completion_length": 203.42857360839844, "epoch": 3.939539347408829, "grad_norm": 0.008867306634783745, "kl": 0.2721799910068512, "learning_rate": 3.258584236855155e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4105 }, { "completion_length": 196.2857208251953, "epoch": 3.9404990403071016, "grad_norm": 0.009807421825826168, "kl": 0.36911436915397644, "learning_rate": 3.1559797250399476e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4106 }, { "completion_length": 218.57144165039062, "epoch": 3.9414587332053745, "grad_norm": 0.010487117804586887, "kl": 0.27726420760154724, "learning_rate": 3.055015514404624e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4107 }, { "completion_length": 184.42857360839844, "epoch": 3.9424184261036466, "grad_norm": 0.014175678603351116, "kl": 0.4832308292388916, "learning_rate": 2.955691671278071e-10, "loss": 0.0005, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4108 }, { "completion_length": 175.57144165039062, "epoch": 3.9433781190019195, "grad_norm": 0.7127887606620789, "kl": 0.39106324315071106, "learning_rate": 2.8580082609108715e-10, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4109 }, { "completion_length": 212.00001525878906, "epoch": 3.944337811900192, "grad_norm": 0.018025314435362816, "kl": 0.3475330173969269, "learning_rate": 2.7619653474758585e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4110 }, { "completion_length": 199.21429443359375, "epoch": 3.9452975047984644, "grad_norm": 1.433348298072815, "kl": 0.3209091126918793, "learning_rate": 2.6675629940689504e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4111 }, { "completion_length": 212.6428680419922, "epoch": 3.946257197696737, "grad_norm": 0.010603510774672031, "kl": 0.3812156021595001, "learning_rate": 2.5748012627072045e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4112 }, { "completion_length": 201.42857360839844, "epoch": 3.9472168905950094, "grad_norm": 0.011668689548969269, "kl": 0.3586845099925995, "learning_rate": 2.4836802143307636e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4113 }, { "completion_length": 234.07144165039062, "epoch": 3.9481765834932823, "grad_norm": 0.9082258939743042, "kl": 0.284910649061203, "learning_rate": 2.3941999088014643e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4114 }, { "completion_length": 181.85714721679688, "epoch": 3.9491362763915547, "grad_norm": 0.012564032338559628, "kl": 0.30218708515167236, "learning_rate": 2.3063604049033957e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4115 }, { "completion_length": 217.57144165039062, "epoch": 3.950095969289827, "grad_norm": 0.9620859026908875, "kl": 0.3150911033153534, "learning_rate": 2.2201617603426204e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4116 }, { "completion_length": 227.2857208251953, "epoch": 3.9510556621880997, "grad_norm": 0.020953837782144547, "kl": 0.33179742097854614, "learning_rate": 2.135604031747451e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4117 }, { "completion_length": 241.4285888671875, "epoch": 3.952015355086372, "grad_norm": 0.010539610870182514, "kl": 0.25658705830574036, "learning_rate": 2.052687274667897e-10, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4118 }, { "completion_length": 193.07144165039062, "epoch": 3.952975047984645, "grad_norm": 0.9374324083328247, "kl": 0.33488693833351135, "learning_rate": 1.9714115435764957e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4119 }, { "completion_length": 199.57144165039062, "epoch": 3.9539347408829175, "grad_norm": 1.8340628147125244, "kl": 0.3202204704284668, "learning_rate": 1.8917768918669253e-10, "loss": 0.0003, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4120 }, { "completion_length": 180.35714721679688, "epoch": 3.95489443378119, "grad_norm": 0.011134167201817036, "kl": 0.35258471965789795, "learning_rate": 1.8137833718551155e-10, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4121 }, { "completion_length": 152.7857208251953, "epoch": 3.9558541266794625, "grad_norm": 1.323911190032959, "kl": 0.35279595851898193, "learning_rate": 1.7374310347792464e-10, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4122 }, { "completion_length": 149.5, "epoch": 3.956813819577735, "grad_norm": 0.04783977195620537, "kl": 0.4397421181201935, "learning_rate": 1.6627199307986394e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4123 }, { "completion_length": 213.2857208251953, "epoch": 3.957773512476008, "grad_norm": 0.009449991397559643, "kl": 0.29652711749076843, "learning_rate": 1.5896501089948666e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4124 }, { "completion_length": 198.2857208251953, "epoch": 3.9587332053742803, "grad_norm": 0.014763926155865192, "kl": 0.37000128626823425, "learning_rate": 1.518221617371196e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4125 }, { "completion_length": 163.42857360839844, "epoch": 3.9596928982725528, "grad_norm": 1.4599202871322632, "kl": 0.3835083842277527, "learning_rate": 1.4484345028525913e-10, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4126 }, { "completion_length": 204.7857208251953, "epoch": 3.9606525911708252, "grad_norm": 0.7372411489486694, "kl": 0.29175376892089844, "learning_rate": 1.3802888112857125e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4127 }, { "completion_length": 115.5714340209961, "epoch": 3.9616122840690977, "grad_norm": 1.3507460355758667, "kl": 0.5377938151359558, "learning_rate": 1.31378458743836e-10, "loss": 0.0005, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4128 }, { "completion_length": 197.85714721679688, "epoch": 3.9625719769673706, "grad_norm": 1.2388498783111572, "kl": 0.3196409344673157, "learning_rate": 1.248921875001141e-10, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4129 }, { "completion_length": 208.71429443359375, "epoch": 3.963531669865643, "grad_norm": 0.01957286149263382, "kl": 0.32645875215530396, "learning_rate": 1.1857007165852472e-10, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4130 }, { "completion_length": 171.85714721679688, "epoch": 3.9644913627639156, "grad_norm": 0.03451311215758324, "kl": 0.42489925026893616, "learning_rate": 1.1241211537238449e-10, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4131 }, { "completion_length": 193.92857360839844, "epoch": 3.965451055662188, "grad_norm": 0.01587684452533722, "kl": 0.35307207703590393, "learning_rate": 1.0641832268717954e-10, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4132 }, { "completion_length": 178.1428680419922, "epoch": 3.9664107485604605, "grad_norm": 1.429308533668518, "kl": 0.3534053564071655, "learning_rate": 1.0058869754051014e-10, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4133 }, { "completion_length": 250.21429443359375, "epoch": 3.9673704414587334, "grad_norm": 1.0521749258041382, "kl": 0.27225837111473083, "learning_rate": 9.492324376214611e-11, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4134 }, { "completion_length": 191.71429443359375, "epoch": 3.968330134357006, "grad_norm": 0.7278079986572266, "kl": 0.36203959584236145, "learning_rate": 8.942196507402688e-11, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4135 }, { "completion_length": 160.07144165039062, "epoch": 3.9692898272552783, "grad_norm": 1.4300295114517212, "kl": 0.4221406877040863, "learning_rate": 8.40848650901782e-11, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4136 }, { "completion_length": 193.50001525878906, "epoch": 3.970249520153551, "grad_norm": 1.544615387916565, "kl": 0.41483113169670105, "learning_rate": 7.891194731685091e-11, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4137 }, { "completion_length": 123.0714340209961, "epoch": 3.9712092130518233, "grad_norm": 1.585552453994751, "kl": 0.4805457592010498, "learning_rate": 7.390321515238218e-11, "loss": 0.0005, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4138 }, { "completion_length": 191.71429443359375, "epoch": 3.972168905950096, "grad_norm": 1.593056082725525, "kl": 0.3339795768260956, "learning_rate": 6.905867188722326e-11, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4139 }, { "completion_length": 172.42857360839844, "epoch": 3.9731285988483687, "grad_norm": 0.039267294108867645, "kl": 0.4126411974430084, "learning_rate": 6.437832070402271e-11, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4140 }, { "completion_length": 170.71429443359375, "epoch": 3.974088291746641, "grad_norm": 0.03345146402716637, "kl": 0.3942766487598419, "learning_rate": 5.986216467754324e-11, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4141 }, { "completion_length": 199.50001525878906, "epoch": 3.9750479846449136, "grad_norm": 0.018699781969189644, "kl": 0.35364919900894165, "learning_rate": 5.5510206774689314e-11, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4142 }, { "completion_length": 181.71429443359375, "epoch": 3.976007677543186, "grad_norm": 1.6237471103668213, "kl": 0.37132737040519714, "learning_rate": 5.132244985442402e-11, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4143 }, { "completion_length": 169.2857208251953, "epoch": 3.976967370441459, "grad_norm": 0.11415563523769379, "kl": 0.44739365577697754, "learning_rate": 4.729889666793552e-11, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4144 }, { "completion_length": 188.35714721679688, "epoch": 3.9779270633397315, "grad_norm": 0.8571979403495789, "kl": 0.3671565353870392, "learning_rate": 4.3439549858470534e-11, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4145 }, { "completion_length": 199.07144165039062, "epoch": 3.978886756238004, "grad_norm": 0.02035924233496189, "kl": 0.4092307388782501, "learning_rate": 3.97444119614454e-11, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4146 }, { "completion_length": 184.00001525878906, "epoch": 3.9798464491362764, "grad_norm": 1.439220666885376, "kl": 0.3756190240383148, "learning_rate": 3.6213485404362757e-11, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4147 }, { "completion_length": 199.2857208251953, "epoch": 3.980806142034549, "grad_norm": 0.017021549865603447, "kl": 0.32759690284729004, "learning_rate": 3.2846772506894825e-11, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4148 }, { "completion_length": 181.85714721679688, "epoch": 3.9817658349328218, "grad_norm": 0.9408387541770935, "kl": 0.4439185857772827, "learning_rate": 2.9644275480772416e-11, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4149 }, { "completion_length": 250.7857208251953, "epoch": 3.982725527831094, "grad_norm": 0.7679616212844849, "kl": 0.3202245831489563, "learning_rate": 2.6605996429895915e-11, "loss": 0.0003, "reward": 0.785714328289032, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.785714328289032, "step": 4150 }, { "completion_length": 217.92857360839844, "epoch": 3.9836852207293667, "grad_norm": 0.013046330772340298, "kl": 0.2954139709472656, "learning_rate": 2.3731937350224273e-11, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4151 }, { "completion_length": 165.85714721679688, "epoch": 3.984644913627639, "grad_norm": 0.011290816590189934, "kl": 0.3922885060310364, "learning_rate": 2.1022100129941543e-11, "loss": 0.0004, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4152 }, { "completion_length": 198.57144165039062, "epoch": 3.9856046065259116, "grad_norm": 0.012609419412910938, "kl": 0.3225898742675781, "learning_rate": 1.8476486549207083e-11, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4153 }, { "completion_length": 266.0, "epoch": 3.986564299424184, "grad_norm": 1.1137069463729858, "kl": 0.2599567770957947, "learning_rate": 1.6095098280377585e-11, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4154 }, { "completion_length": 192.07144165039062, "epoch": 3.9875239923224566, "grad_norm": 0.014428005553781986, "kl": 0.3342946171760559, "learning_rate": 1.3877936887923825e-11, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4155 }, { "completion_length": 186.57144165039062, "epoch": 3.9884836852207295, "grad_norm": 1.4576314687728882, "kl": 0.4063270688056946, "learning_rate": 1.1825003828402901e-11, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4156 }, { "completion_length": 187.7857208251953, "epoch": 3.989443378119002, "grad_norm": 1.3626865148544312, "kl": 0.40077850222587585, "learning_rate": 9.93630045048599e-12, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4157 }, { "completion_length": 224.9285888671875, "epoch": 3.9904030710172744, "grad_norm": 0.015782756730914116, "kl": 0.3058566451072693, "learning_rate": 8.211827994958342e-12, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4158 }, { "completion_length": 206.1428680419922, "epoch": 3.991362763915547, "grad_norm": 0.018555253744125366, "kl": 0.31648024916648865, "learning_rate": 6.651587594719288e-12, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4159 }, { "completion_length": 183.85714721679688, "epoch": 3.9923224568138194, "grad_norm": 0.011525201611220837, "kl": 0.34522733092308044, "learning_rate": 5.255580274726723e-12, "loss": 0.0003, "reward": 1.0, "reward_std": 0.0, "rewards/check_gptzero_func": 1.0, "step": 4160 }, { "completion_length": 202.71429443359375, "epoch": 3.9932821497120923, "grad_norm": 1.2554000616073608, "kl": 0.3758290410041809, "learning_rate": 4.0238069521358884e-12, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.2020305097103119, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4161 }, { "completion_length": 185.71429443359375, "epoch": 3.9942418426103647, "grad_norm": 1.036627173423767, "kl": 0.3417288064956665, "learning_rate": 2.9562684361605916e-12, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4162 }, { "completion_length": 199.71429443359375, "epoch": 3.995201535508637, "grad_norm": 1.710235357284546, "kl": 0.37269335985183716, "learning_rate": 2.0529654280732055e-12, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4163 }, { "completion_length": 145.71429443359375, "epoch": 3.9961612284069097, "grad_norm": 1.5672842264175415, "kl": 0.3694833219051361, "learning_rate": 1.3138985213434483e-12, "loss": 0.0004, "reward": 0.6428571939468384, "reward_std": 0.30304574966430664, "rewards/check_gptzero_func": 0.6428571939468384, "step": 4164 }, { "completion_length": 160.42857360839844, "epoch": 3.997120921305182, "grad_norm": 4.099261283874512, "kl": 0.44954952597618103, "learning_rate": 7.390682014718485e-13, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4165 }, { "completion_length": 219.6428680419922, "epoch": 3.998080614203455, "grad_norm": 1.321685791015625, "kl": 0.28158673644065857, "learning_rate": 3.284748461285236e-13, "loss": 0.0003, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4166 }, { "completion_length": 213.1428680419922, "epoch": 3.9990403071017275, "grad_norm": 0.017217423766851425, "kl": 0.36850202083587646, "learning_rate": 8.211872501440175e-14, "loss": 0.0004, "reward": 0.8571429252624512, "reward_std": 0.0, "rewards/check_gptzero_func": 0.8571429252624512, "step": 4167 }, { "completion_length": 153.71429443359375, "epoch": 4.0, "grad_norm": 1.348327398300171, "kl": 0.3953126072883606, "learning_rate": 0.0, "loss": 0.0004, "reward": 0.9285714626312256, "reward_std": 0.10101525485515594, "rewards/check_gptzero_func": 0.9285714626312256, "step": 4168 } ], "logging_steps": 1, "max_steps": 4168, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }