|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9984, |
|
"eval_steps": 500, |
|
"global_step": 624, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1090.0166961669922, |
|
"epoch": 0.032, |
|
"grad_norm": 1.3739021378163432, |
|
"kl": 0.011310577392578125, |
|
"learning_rate": 2.951923076923077e-06, |
|
"loss": 0.0005, |
|
"num_tokens": 1218701.0, |
|
"reward": 1.3793484151363373, |
|
"reward_std": 0.24156477563083173, |
|
"rewards/reward_func": 1.379348385334015, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1187.3583862304688, |
|
"epoch": 0.064, |
|
"grad_norm": 1.2407170709125623, |
|
"kl": 0.05146484375, |
|
"learning_rate": 2.903846153846154e-06, |
|
"loss": 0.0021, |
|
"num_tokens": 2492712.0, |
|
"reward": 1.4012527942657471, |
|
"reward_std": 0.232261598482728, |
|
"rewards/reward_func": 1.4012527465820312, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1193.7833740234375, |
|
"epoch": 0.096, |
|
"grad_norm": 1.6332710755623894, |
|
"kl": 0.0868896484375, |
|
"learning_rate": 2.8557692307692307e-06, |
|
"loss": 0.0035, |
|
"num_tokens": 3763792.0, |
|
"reward": 1.3906624972820283, |
|
"reward_std": 0.2509541690349579, |
|
"rewards/reward_func": 1.3906624555587768, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1147.8333679199218, |
|
"epoch": 0.128, |
|
"grad_norm": 1.1957910220069445, |
|
"kl": 0.11298828125, |
|
"learning_rate": 2.807692307692308e-06, |
|
"loss": 0.0045, |
|
"num_tokens": 5019020.0, |
|
"reward": 1.3979449570178986, |
|
"reward_std": 0.24268287047743797, |
|
"rewards/reward_func": 1.3979449152946473, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1035.5166961669922, |
|
"epoch": 0.16, |
|
"grad_norm": 1.2514751954183139, |
|
"kl": 0.148388671875, |
|
"learning_rate": 2.7596153846153844e-06, |
|
"loss": 0.0059, |
|
"num_tokens": 6212869.0, |
|
"reward": 1.4058575332164764, |
|
"reward_std": 0.20526219047605992, |
|
"rewards/reward_func": 1.4058574855327606, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1127.416696166992, |
|
"epoch": 0.192, |
|
"grad_norm": 1.1179435424314572, |
|
"kl": 0.16640625, |
|
"learning_rate": 2.7115384615384617e-06, |
|
"loss": 0.0067, |
|
"num_tokens": 7461579.0, |
|
"reward": 1.3581139743328094, |
|
"reward_std": 0.20967118199914694, |
|
"rewards/reward_func": 1.3581139326095581, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1225.225033569336, |
|
"epoch": 0.224, |
|
"grad_norm": 1.0602733950469436, |
|
"kl": 0.17060546875, |
|
"learning_rate": 2.6634615384615385e-06, |
|
"loss": 0.0068, |
|
"num_tokens": 8750799.0, |
|
"reward": 1.394177681207657, |
|
"reward_std": 0.20445052348077297, |
|
"rewards/reward_func": 1.394177633523941, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1162.0917053222656, |
|
"epoch": 0.256, |
|
"grad_norm": 1.1076761293627495, |
|
"kl": 0.19599609375, |
|
"learning_rate": 2.6153846153846154e-06, |
|
"loss": 0.0078, |
|
"num_tokens": 10014725.0, |
|
"reward": 1.406614112854004, |
|
"reward_std": 0.17760551236569883, |
|
"rewards/reward_func": 1.4066140830516816, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1248.1833862304688, |
|
"epoch": 0.288, |
|
"grad_norm": 1.0403097644790908, |
|
"kl": 0.204150390625, |
|
"learning_rate": 2.5673076923076923e-06, |
|
"loss": 0.0082, |
|
"num_tokens": 11313051.0, |
|
"reward": 1.3719308912754058, |
|
"reward_std": 0.19940033443272115, |
|
"rewards/reward_func": 1.371930855512619, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1225.9083618164063, |
|
"epoch": 0.32, |
|
"grad_norm": 1.0234085541077607, |
|
"kl": 0.215966796875, |
|
"learning_rate": 2.5192307692307695e-06, |
|
"loss": 0.0086, |
|
"num_tokens": 12598432.0, |
|
"reward": 1.328627246618271, |
|
"reward_std": 0.17000198867172003, |
|
"rewards/reward_func": 1.328627210855484, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1155.7333679199219, |
|
"epoch": 0.352, |
|
"grad_norm": 0.993996272164919, |
|
"kl": 0.231884765625, |
|
"learning_rate": 2.471153846153846e-06, |
|
"loss": 0.0093, |
|
"num_tokens": 13854596.0, |
|
"reward": 1.3170884191989898, |
|
"reward_std": 0.16620137430727483, |
|
"rewards/reward_func": 1.317088395357132, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1179.6333618164062, |
|
"epoch": 0.384, |
|
"grad_norm": 1.0386008286011814, |
|
"kl": 0.24189453125, |
|
"learning_rate": 2.4230769230769233e-06, |
|
"loss": 0.0097, |
|
"num_tokens": 15122610.0, |
|
"reward": 1.3696976065635682, |
|
"reward_std": 0.21136937104165554, |
|
"rewards/reward_func": 1.3696975648403167, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1119.6167114257812, |
|
"epoch": 0.416, |
|
"grad_norm": 0.9856388260555886, |
|
"kl": 0.249951171875, |
|
"learning_rate": 2.375e-06, |
|
"loss": 0.01, |
|
"num_tokens": 16352936.0, |
|
"reward": 1.3440082728862763, |
|
"reward_std": 0.20238495394587516, |
|
"rewards/reward_func": 1.3440082371234894, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1080.04169921875, |
|
"epoch": 0.448, |
|
"grad_norm": 1.0007193412023812, |
|
"kl": 0.25869140625, |
|
"learning_rate": 2.326923076923077e-06, |
|
"loss": 0.0103, |
|
"num_tokens": 17573812.0, |
|
"reward": 1.3747205495834351, |
|
"reward_std": 0.19062257995828985, |
|
"rewards/reward_func": 1.3747205018997193, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1101.4416961669922, |
|
"epoch": 0.48, |
|
"grad_norm": 1.031523753313366, |
|
"kl": 0.263427734375, |
|
"learning_rate": 2.278846153846154e-06, |
|
"loss": 0.0105, |
|
"num_tokens": 18802458.0, |
|
"reward": 1.3720044195652008, |
|
"reward_std": 0.19671934731304647, |
|
"rewards/reward_func": 1.3720043897628784, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1075.5250366210937, |
|
"epoch": 0.512, |
|
"grad_norm": 1.0105581618764354, |
|
"kl": 0.270361328125, |
|
"learning_rate": 2.2307692307692307e-06, |
|
"loss": 0.0108, |
|
"num_tokens": 20019060.0, |
|
"reward": 1.3801562547683717, |
|
"reward_std": 0.1729557979851961, |
|
"rewards/reward_func": 1.3801562130451202, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1243.0083740234375, |
|
"epoch": 0.544, |
|
"grad_norm": 1.0536982761834348, |
|
"kl": 0.26962890625, |
|
"learning_rate": 2.1826923076923076e-06, |
|
"loss": 0.0108, |
|
"num_tokens": 21314989.0, |
|
"reward": 1.3575429141521453, |
|
"reward_std": 0.19225950334221126, |
|
"rewards/reward_func": 1.357542872428894, |
|
"step": 170 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1280.0750427246094, |
|
"epoch": 0.576, |
|
"grad_norm": 0.8848699702030035, |
|
"kl": 0.267626953125, |
|
"learning_rate": 2.134615384615385e-06, |
|
"loss": 0.0107, |
|
"num_tokens": 22632769.0, |
|
"reward": 1.4011802554130555, |
|
"reward_std": 0.2013660490512848, |
|
"rewards/reward_func": 1.401180225610733, |
|
"step": 180 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1198.9833740234376, |
|
"epoch": 0.608, |
|
"grad_norm": 0.991723778598343, |
|
"kl": 0.267822265625, |
|
"learning_rate": 2.0865384615384613e-06, |
|
"loss": 0.0107, |
|
"num_tokens": 23901665.0, |
|
"reward": 1.4138015925884246, |
|
"reward_std": 0.20857114791870118, |
|
"rewards/reward_func": 1.413801556825638, |
|
"step": 190 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1184.183367919922, |
|
"epoch": 0.64, |
|
"grad_norm": 0.9649670800133836, |
|
"kl": 0.270166015625, |
|
"learning_rate": 2.0384615384615386e-06, |
|
"loss": 0.0108, |
|
"num_tokens": 25164675.0, |
|
"reward": 1.3996073424816131, |
|
"reward_std": 0.16547524519264697, |
|
"rewards/reward_func": 1.3996073186397553, |
|
"step": 200 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1196.316717529297, |
|
"epoch": 0.672, |
|
"grad_norm": 0.918724273092122, |
|
"kl": 0.2720703125, |
|
"learning_rate": 1.9903846153846155e-06, |
|
"loss": 0.0109, |
|
"num_tokens": 26431763.0, |
|
"reward": 1.3570792913436889, |
|
"reward_std": 0.17546614594757556, |
|
"rewards/reward_func": 1.3570792555809021, |
|
"step": 210 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1102.4250427246093, |
|
"epoch": 0.704, |
|
"grad_norm": 1.0148202612463983, |
|
"kl": 0.2970703125, |
|
"learning_rate": 1.9423076923076923e-06, |
|
"loss": 0.0119, |
|
"num_tokens": 27658616.0, |
|
"reward": 1.4585702121257782, |
|
"reward_std": 0.17592256814241408, |
|
"rewards/reward_func": 1.4585701644420623, |
|
"step": 220 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1159.0083740234375, |
|
"epoch": 0.736, |
|
"grad_norm": 0.9394385447945356, |
|
"kl": 0.30498046875, |
|
"learning_rate": 1.8942307692307692e-06, |
|
"loss": 0.0122, |
|
"num_tokens": 28915389.0, |
|
"reward": 1.3902163803577423, |
|
"reward_std": 0.17962961010634898, |
|
"rewards/reward_func": 1.3902163565158845, |
|
"step": 230 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1130.2417083740233, |
|
"epoch": 0.768, |
|
"grad_norm": 0.9787550227844843, |
|
"kl": 0.28994140625, |
|
"learning_rate": 1.8461538461538462e-06, |
|
"loss": 0.0116, |
|
"num_tokens": 30154961.0, |
|
"reward": 1.4172639489173888, |
|
"reward_std": 0.19173008762300014, |
|
"rewards/reward_func": 1.4172639012336732, |
|
"step": 240 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1228.6833740234374, |
|
"epoch": 0.8, |
|
"grad_norm": 0.8878239030584107, |
|
"kl": 0.27294921875, |
|
"learning_rate": 1.7980769230769231e-06, |
|
"loss": 0.0109, |
|
"num_tokens": 31447029.0, |
|
"reward": 1.344082099199295, |
|
"reward_std": 0.2040238121524453, |
|
"rewards/reward_func": 1.3440820574760437, |
|
"step": 250 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1235.8750457763672, |
|
"epoch": 0.832, |
|
"grad_norm": 0.9981329657418295, |
|
"kl": 0.270654296875, |
|
"learning_rate": 1.7500000000000002e-06, |
|
"loss": 0.0108, |
|
"num_tokens": 32737296.0, |
|
"reward": 1.3086235880851746, |
|
"reward_std": 0.16500989422202111, |
|
"rewards/reward_func": 1.3086235344409942, |
|
"step": 260 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1109.133364868164, |
|
"epoch": 0.864, |
|
"grad_norm": 1.0372489736696113, |
|
"kl": 0.276611328125, |
|
"learning_rate": 1.7019230769230768e-06, |
|
"loss": 0.0111, |
|
"num_tokens": 33967186.0, |
|
"reward": 1.32349790930748, |
|
"reward_std": 0.16945721451193094, |
|
"rewards/reward_func": 1.3234978675842286, |
|
"step": 270 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1160.8750396728515, |
|
"epoch": 0.896, |
|
"grad_norm": 0.9977790529738992, |
|
"kl": 0.280419921875, |
|
"learning_rate": 1.653846153846154e-06, |
|
"loss": 0.0112, |
|
"num_tokens": 35221063.0, |
|
"reward": 1.4063874542713166, |
|
"reward_std": 0.19618584401905537, |
|
"rewards/reward_func": 1.4063874065876008, |
|
"step": 280 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1213.883352661133, |
|
"epoch": 0.928, |
|
"grad_norm": 0.8934722066084559, |
|
"kl": 0.28408203125, |
|
"learning_rate": 1.6057692307692308e-06, |
|
"loss": 0.0114, |
|
"num_tokens": 36502115.0, |
|
"reward": 1.3294113516807555, |
|
"reward_std": 0.17417304012924434, |
|
"rewards/reward_func": 1.3294113099575042, |
|
"step": 290 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1091.5333679199218, |
|
"epoch": 0.96, |
|
"grad_norm": 1.018724425828204, |
|
"kl": 0.28583984375, |
|
"learning_rate": 1.5576923076923078e-06, |
|
"loss": 0.0114, |
|
"num_tokens": 37727916.0, |
|
"reward": 1.3869296073913575, |
|
"reward_std": 0.16371173467487096, |
|
"rewards/reward_func": 1.3869295775890351, |
|
"step": 300 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1021.2416961669921, |
|
"epoch": 0.992, |
|
"grad_norm": 0.9294425443280302, |
|
"kl": 0.2927734375, |
|
"learning_rate": 1.5096153846153847e-06, |
|
"loss": 0.0117, |
|
"num_tokens": 38912972.0, |
|
"reward": 1.4153659164905548, |
|
"reward_std": 0.15135814435780048, |
|
"rewards/reward_func": 1.4153658747673035, |
|
"step": 310 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1182.7667175292968, |
|
"epoch": 1.0256, |
|
"grad_norm": 0.8869424312219367, |
|
"kl": 0.27578125, |
|
"learning_rate": 1.4615384615384616e-06, |
|
"loss": 0.011, |
|
"num_tokens": 40172304.0, |
|
"reward": 1.3812768101692199, |
|
"reward_std": 0.16684217527508735, |
|
"rewards/reward_func": 1.3812767565250397, |
|
"step": 320 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1228.6667022705078, |
|
"epoch": 1.0576, |
|
"grad_norm": 0.8613262556098445, |
|
"kl": 0.28349609375, |
|
"learning_rate": 1.4134615384615384e-06, |
|
"loss": 0.0113, |
|
"num_tokens": 41456270.0, |
|
"reward": 1.3983363509178162, |
|
"reward_std": 0.16395095624029637, |
|
"rewards/reward_func": 1.3983363211154938, |
|
"step": 330 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1167.2917053222657, |
|
"epoch": 1.0896, |
|
"grad_norm": 0.9390384699267686, |
|
"kl": 0.280078125, |
|
"learning_rate": 1.3653846153846153e-06, |
|
"loss": 0.0112, |
|
"num_tokens": 42712372.0, |
|
"reward": 1.3964947879314422, |
|
"reward_std": 0.1702516170218587, |
|
"rewards/reward_func": 1.3964947640895844, |
|
"step": 340 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1084.7917053222657, |
|
"epoch": 1.1216, |
|
"grad_norm": 0.9885096589277191, |
|
"kl": 0.2884765625, |
|
"learning_rate": 1.3173076923076924e-06, |
|
"loss": 0.0115, |
|
"num_tokens": 43925166.0, |
|
"reward": 1.4053608894348144, |
|
"reward_std": 0.1990381298586726, |
|
"rewards/reward_func": 1.4053608417510985, |
|
"step": 350 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1068.3333618164063, |
|
"epoch": 1.1536, |
|
"grad_norm": 1.0735781172914411, |
|
"kl": 0.280126953125, |
|
"learning_rate": 1.2692307692307692e-06, |
|
"loss": 0.0112, |
|
"num_tokens": 45145132.0, |
|
"reward": 1.4313134729862214, |
|
"reward_std": 0.16737409718334675, |
|
"rewards/reward_func": 1.4313134491443633, |
|
"step": 360 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1042.34169921875, |
|
"epoch": 1.1856, |
|
"grad_norm": 0.9580029006411359, |
|
"kl": 0.27919921875, |
|
"learning_rate": 1.221153846153846e-06, |
|
"loss": 0.0112, |
|
"num_tokens": 46348908.0, |
|
"reward": 1.3674868583679198, |
|
"reward_std": 0.14877260774374007, |
|
"rewards/reward_func": 1.3674868285655974, |
|
"step": 370 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1031.9250396728517, |
|
"epoch": 1.2176, |
|
"grad_norm": 0.8804020781470968, |
|
"kl": 0.28212890625, |
|
"learning_rate": 1.1730769230769232e-06, |
|
"loss": 0.0113, |
|
"num_tokens": 47539473.0, |
|
"reward": 1.3569320678710937, |
|
"reward_std": 0.2110467005521059, |
|
"rewards/reward_func": 1.3569320380687713, |
|
"step": 380 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1122.9250366210938, |
|
"epoch": 1.2496, |
|
"grad_norm": 0.9380366433577524, |
|
"kl": 0.265771484375, |
|
"learning_rate": 1.125e-06, |
|
"loss": 0.0106, |
|
"num_tokens": 48783723.0, |
|
"reward": 1.3864129424095153, |
|
"reward_std": 0.1814204862341285, |
|
"rewards/reward_func": 1.3864129185676575, |
|
"step": 390 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1159.4166931152345, |
|
"epoch": 1.2816, |
|
"grad_norm": 0.8419779230541888, |
|
"kl": 0.256494140625, |
|
"learning_rate": 1.0769230769230769e-06, |
|
"loss": 0.0103, |
|
"num_tokens": 50043956.0, |
|
"reward": 1.3803913414478302, |
|
"reward_std": 0.19982445538043975, |
|
"rewards/reward_func": 1.38039128780365, |
|
"step": 400 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1137.99169921875, |
|
"epoch": 1.3136, |
|
"grad_norm": 0.9984529373671053, |
|
"kl": 0.2568359375, |
|
"learning_rate": 1.028846153846154e-06, |
|
"loss": 0.0103, |
|
"num_tokens": 51299782.0, |
|
"reward": 1.460522174835205, |
|
"reward_std": 0.17815895490348338, |
|
"rewards/reward_func": 1.4605221152305603, |
|
"step": 410 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1041.4417053222655, |
|
"epoch": 1.3456000000000001, |
|
"grad_norm": 0.9912374462984505, |
|
"kl": 0.26103515625, |
|
"learning_rate": 9.807692307692308e-07, |
|
"loss": 0.0104, |
|
"num_tokens": 52501749.0, |
|
"reward": 1.3423645079135895, |
|
"reward_std": 0.14929314143955708, |
|
"rewards/reward_func": 1.3423644721508026, |
|
"step": 420 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1092.66669921875, |
|
"epoch": 1.3776, |
|
"grad_norm": 0.9523986290310165, |
|
"kl": 0.250439453125, |
|
"learning_rate": 9.326923076923077e-07, |
|
"loss": 0.01, |
|
"num_tokens": 53728457.0, |
|
"reward": 1.354611724615097, |
|
"reward_std": 0.13474751003086566, |
|
"rewards/reward_func": 1.3546116948127747, |
|
"step": 430 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1057.16669921875, |
|
"epoch": 1.4096, |
|
"grad_norm": 1.009051628688743, |
|
"kl": 0.246142578125, |
|
"learning_rate": 8.846153846153847e-07, |
|
"loss": 0.0099, |
|
"num_tokens": 54938749.0, |
|
"reward": 1.4103461623191833, |
|
"reward_std": 0.17336017228662967, |
|
"rewards/reward_func": 1.4103461384773255, |
|
"step": 440 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1060.3250366210937, |
|
"epoch": 1.4416, |
|
"grad_norm": 0.9353173548774456, |
|
"kl": 0.240380859375, |
|
"learning_rate": 8.365384615384616e-07, |
|
"loss": 0.0096, |
|
"num_tokens": 56141986.0, |
|
"reward": 1.3699217855930328, |
|
"reward_std": 0.1920205257833004, |
|
"rewards/reward_func": 1.369921749830246, |
|
"step": 450 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1070.8083740234374, |
|
"epoch": 1.4736, |
|
"grad_norm": 1.1879379228096696, |
|
"kl": 0.243603515625, |
|
"learning_rate": 7.884615384615385e-07, |
|
"loss": 0.0098, |
|
"num_tokens": 57349220.0, |
|
"reward": 1.433353638648987, |
|
"reward_std": 0.1875140480697155, |
|
"rewards/reward_func": 1.4333535850048065, |
|
"step": 460 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1095.0833648681642, |
|
"epoch": 1.5056, |
|
"grad_norm": 0.9303357023393288, |
|
"kl": 0.238525390625, |
|
"learning_rate": 7.403846153846155e-07, |
|
"loss": 0.0095, |
|
"num_tokens": 58565922.0, |
|
"reward": 1.4035043895244599, |
|
"reward_std": 0.1516191340982914, |
|
"rewards/reward_func": 1.403504341840744, |
|
"step": 470 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1135.9750427246095, |
|
"epoch": 1.5375999999999999, |
|
"grad_norm": 1.0458418760593815, |
|
"kl": 0.2318359375, |
|
"learning_rate": 6.923076923076923e-07, |
|
"loss": 0.0093, |
|
"num_tokens": 59811024.0, |
|
"reward": 1.3473765075206756, |
|
"reward_std": 0.1674899585545063, |
|
"rewards/reward_func": 1.3473764777183532, |
|
"step": 480 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1108.0083770751953, |
|
"epoch": 1.5695999999999999, |
|
"grad_norm": 0.9490082083884928, |
|
"kl": 0.23408203125, |
|
"learning_rate": 6.442307692307692e-07, |
|
"loss": 0.0094, |
|
"num_tokens": 61041091.0, |
|
"reward": 1.4498742938041687, |
|
"reward_std": 0.16173237282782793, |
|
"rewards/reward_func": 1.4498742520809174, |
|
"step": 490 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1115.508352661133, |
|
"epoch": 1.6016, |
|
"grad_norm": 1.0783408314068932, |
|
"kl": 0.238330078125, |
|
"learning_rate": 5.961538461538461e-07, |
|
"loss": 0.0095, |
|
"num_tokens": 62274653.0, |
|
"reward": 1.4305694937705993, |
|
"reward_std": 0.18373069278895854, |
|
"rewards/reward_func": 1.4305694699287415, |
|
"step": 500 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1106.008367919922, |
|
"epoch": 1.6336, |
|
"grad_norm": 1.0485879744752402, |
|
"kl": 0.2328125, |
|
"learning_rate": 5.48076923076923e-07, |
|
"loss": 0.0093, |
|
"num_tokens": 63507003.0, |
|
"reward": 1.4372118294239045, |
|
"reward_std": 0.16331376498565078, |
|
"rewards/reward_func": 1.4372117936611175, |
|
"step": 510 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1086.8583679199219, |
|
"epoch": 1.6656, |
|
"grad_norm": 0.969087239165111, |
|
"kl": 0.22392578125, |
|
"learning_rate": 5e-07, |
|
"loss": 0.009, |
|
"num_tokens": 64735570.0, |
|
"reward": 1.4018862843513489, |
|
"reward_std": 0.14240890312939883, |
|
"rewards/reward_func": 1.4018862307071687, |
|
"step": 520 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1181.633383178711, |
|
"epoch": 1.6976, |
|
"grad_norm": 0.930292561029781, |
|
"kl": 0.22138671875, |
|
"learning_rate": 4.519230769230769e-07, |
|
"loss": 0.0089, |
|
"num_tokens": 65998928.0, |
|
"reward": 1.451034414768219, |
|
"reward_std": 0.1665300503373146, |
|
"rewards/reward_func": 1.4510343849658967, |
|
"step": 530 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1129.3750427246093, |
|
"epoch": 1.7296, |
|
"grad_norm": 0.9485552753279838, |
|
"kl": 0.219775390625, |
|
"learning_rate": 4.0384615384615386e-07, |
|
"loss": 0.0088, |
|
"num_tokens": 67237565.0, |
|
"reward": 1.4116078853607177, |
|
"reward_std": 0.19281931668519975, |
|
"rewards/reward_func": 1.411607849597931, |
|
"step": 540 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1172.0500396728517, |
|
"epoch": 1.7616, |
|
"grad_norm": 0.9153424916960798, |
|
"kl": 0.222265625, |
|
"learning_rate": 3.557692307692308e-07, |
|
"loss": 0.0089, |
|
"num_tokens": 68499368.0, |
|
"reward": 1.3487315595149993, |
|
"reward_std": 0.16217916831374168, |
|
"rewards/reward_func": 1.3487315356731415, |
|
"step": 550 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1124.0000396728515, |
|
"epoch": 1.7936, |
|
"grad_norm": 0.9325471561683758, |
|
"kl": 0.224365234375, |
|
"learning_rate": 3.076923076923077e-07, |
|
"loss": 0.009, |
|
"num_tokens": 69738074.0, |
|
"reward": 1.3620680332183839, |
|
"reward_std": 0.16596744321286677, |
|
"rewards/reward_func": 1.3620680034160615, |
|
"step": 560 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1125.1417053222656, |
|
"epoch": 1.8256000000000001, |
|
"grad_norm": 0.9795359194730576, |
|
"kl": 0.22099609375, |
|
"learning_rate": 2.596153846153846e-07, |
|
"loss": 0.0088, |
|
"num_tokens": 70970701.0, |
|
"reward": 1.4437049567699431, |
|
"reward_std": 0.16041705552488567, |
|
"rewards/reward_func": 1.4437049448490142, |
|
"step": 570 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1224.0833740234375, |
|
"epoch": 1.8576000000000001, |
|
"grad_norm": 0.9733689502236962, |
|
"kl": 0.21044921875, |
|
"learning_rate": 2.1153846153846155e-07, |
|
"loss": 0.0084, |
|
"num_tokens": 72266813.0, |
|
"reward": 1.3616447150707245, |
|
"reward_std": 0.1523937964811921, |
|
"rewards/reward_func": 1.3616446733474732, |
|
"step": 580 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1109.3000366210938, |
|
"epoch": 1.8896, |
|
"grad_norm": 0.9591418934816538, |
|
"kl": 0.218115234375, |
|
"learning_rate": 1.6346153846153846e-07, |
|
"loss": 0.0087, |
|
"num_tokens": 73500374.0, |
|
"reward": 1.4193914115428925, |
|
"reward_std": 0.18239253303036093, |
|
"rewards/reward_func": 1.4193913757801055, |
|
"step": 590 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1144.5250305175782, |
|
"epoch": 1.9216, |
|
"grad_norm": 1.03317829301988, |
|
"kl": 0.223828125, |
|
"learning_rate": 1.1538461538461539e-07, |
|
"loss": 0.009, |
|
"num_tokens": 74746547.0, |
|
"reward": 1.442783957719803, |
|
"reward_std": 0.16291865180246531, |
|
"rewards/reward_func": 1.4427839279174806, |
|
"step": 600 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1135.000033569336, |
|
"epoch": 1.9536, |
|
"grad_norm": 1.0378878472369144, |
|
"kl": 0.21201171875, |
|
"learning_rate": 6.73076923076923e-08, |
|
"loss": 0.0085, |
|
"num_tokens": 75989744.0, |
|
"reward": 1.4489007711410522, |
|
"reward_std": 0.148934593424201, |
|
"rewards/reward_func": 1.4489007532596587, |
|
"step": 610 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 1140.8000366210938, |
|
"epoch": 1.9856, |
|
"grad_norm": 1.001500512040023, |
|
"kl": 0.21005859375, |
|
"learning_rate": 1.923076923076923e-08, |
|
"loss": 0.0084, |
|
"num_tokens": 77236076.0, |
|
"reward": 1.4212264478206635, |
|
"reward_std": 0.18371385149657726, |
|
"rewards/reward_func": 1.4212264180183412, |
|
"step": 620 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 624, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 312, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|