|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9981298423724285, |
|
"eval_steps": 250, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021373230029388193, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits": -2.7276527881622314, |
|
"logps": -123.19757843017578, |
|
"loss": 10.6046, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010686615014694095, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits": -2.8715224266052246, |
|
"logps": -234.59034729003906, |
|
"loss": 10.6046, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137323002938819, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits": -2.846045732498169, |
|
"logps": -248.165771484375, |
|
"loss": 10.6046, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03205984504408229, |
|
"grad_norm": 255.55146866663068, |
|
"learning_rate": 4.25531914893617e-08, |
|
"logits": -2.7775120735168457, |
|
"logps": -229.2094268798828, |
|
"loss": 10.6046, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04274646005877638, |
|
"grad_norm": 230.48646842792138, |
|
"learning_rate": 8.51063829787234e-08, |
|
"logits": -2.7639544010162354, |
|
"logps": -203.9646453857422, |
|
"loss": 10.5828, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053433075073470476, |
|
"grad_norm": 232.18379947059884, |
|
"learning_rate": 1.3829787234042553e-07, |
|
"logits": -2.9257798194885254, |
|
"logps": -291.21368408203125, |
|
"loss": 10.3131, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06411969008816458, |
|
"grad_norm": 196.18922057698444, |
|
"learning_rate": 1.8085106382978725e-07, |
|
"logits": -2.9007389545440674, |
|
"logps": -280.6877746582031, |
|
"loss": 9.7161, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07480630510285867, |
|
"grad_norm": 149.23598824045513, |
|
"learning_rate": 2.3404255319148937e-07, |
|
"logits": -2.8924500942230225, |
|
"logps": -238.8040008544922, |
|
"loss": 9.1924, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08549292011755276, |
|
"grad_norm": 161.52328592717336, |
|
"learning_rate": 2.872340425531915e-07, |
|
"logits": -2.906430244445801, |
|
"logps": -238.0535125732422, |
|
"loss": 8.689, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09617953513224686, |
|
"grad_norm": 164.58690993798479, |
|
"learning_rate": 3.404255319148936e-07, |
|
"logits": -2.8339877128601074, |
|
"logps": -255.1993408203125, |
|
"loss": 8.6734, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10686615014694095, |
|
"grad_norm": 160.57372707570067, |
|
"learning_rate": 3.9361702127659574e-07, |
|
"logits": -2.683300495147705, |
|
"logps": -267.0218811035156, |
|
"loss": 8.3867, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11755276516163506, |
|
"grad_norm": 188.48791056446322, |
|
"learning_rate": 4.4680851063829783e-07, |
|
"logits": -2.871129035949707, |
|
"logps": -279.77490234375, |
|
"loss": 8.1179, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12823938017632916, |
|
"grad_norm": 267.6697231628655, |
|
"learning_rate": 5e-07, |
|
"logits": -2.7084033489227295, |
|
"logps": -250.2171630859375, |
|
"loss": 8.1081, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13892599519102325, |
|
"grad_norm": 177.79779678409923, |
|
"learning_rate": 4.998251761970996e-07, |
|
"logits": -2.750121593475342, |
|
"logps": -287.4389953613281, |
|
"loss": 7.8328, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14961261020571734, |
|
"grad_norm": 208.94788258133997, |
|
"learning_rate": 4.993009492952949e-07, |
|
"logits": -2.4973702430725098, |
|
"logps": -269.4114685058594, |
|
"loss": 7.9672, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16029922522041143, |
|
"grad_norm": 173.26779082554597, |
|
"learning_rate": 4.984280524733107e-07, |
|
"logits": -2.3464930057525635, |
|
"logps": -264.332763671875, |
|
"loss": 7.5605, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17098584023510552, |
|
"grad_norm": 202.04935826838994, |
|
"learning_rate": 4.972077065562821e-07, |
|
"logits": -2.621952772140503, |
|
"logps": -305.9883117675781, |
|
"loss": 7.637, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18167245524979964, |
|
"grad_norm": 199.23137245224822, |
|
"learning_rate": 4.959823971496574e-07, |
|
"logits": -2.253603458404541, |
|
"logps": -302.60235595703125, |
|
"loss": 7.5955, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19235907026449373, |
|
"grad_norm": 218.9891204099018, |
|
"learning_rate": 4.941412689514941e-07, |
|
"logits": -2.4109156131744385, |
|
"logps": -239.48507690429688, |
|
"loss": 7.6334, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 242.95380700951432, |
|
"learning_rate": 4.919586871126667e-07, |
|
"logits": -2.2712628841400146, |
|
"logps": -297.40130615234375, |
|
"loss": 7.5278, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2137323002938819, |
|
"grad_norm": 279.78089235463716, |
|
"learning_rate": 4.894377041712326e-07, |
|
"logits": -2.2863192558288574, |
|
"logps": -246.08676147460938, |
|
"loss": 7.4129, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.224418915308576, |
|
"grad_norm": 218.205141996837, |
|
"learning_rate": 4.86581845949791e-07, |
|
"logits": -2.370880603790283, |
|
"logps": -293.19805908203125, |
|
"loss": 7.2449, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2351055303232701, |
|
"grad_norm": 194.81739436050302, |
|
"learning_rate": 4.833951066243004e-07, |
|
"logits": -2.4095540046691895, |
|
"logps": -306.03082275390625, |
|
"loss": 7.3173, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2457921453379642, |
|
"grad_norm": 226.30530082559127, |
|
"learning_rate": 4.798819431378626e-07, |
|
"logits": -2.2181053161621094, |
|
"logps": -286.49847412109375, |
|
"loss": 7.526, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2564787603526583, |
|
"grad_norm": 202.93833816678475, |
|
"learning_rate": 4.7604726896728496e-07, |
|
"logits": -2.2005436420440674, |
|
"logps": -267.14569091796875, |
|
"loss": 7.4251, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2671653753673524, |
|
"grad_norm": 203.06428453105448, |
|
"learning_rate": 4.718964472511385e-07, |
|
"logits": -2.209239959716797, |
|
"logps": -288.13275146484375, |
|
"loss": 7.1662, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2778519903820465, |
|
"grad_norm": 215.2104310960483, |
|
"learning_rate": 4.6743528328892384e-07, |
|
"logits": -2.2576992511749268, |
|
"logps": -281.14300537109375, |
|
"loss": 7.3282, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2885386053967406, |
|
"grad_norm": 238.8531994789955, |
|
"learning_rate": 4.626700164218349e-07, |
|
"logits": -2.2369213104248047, |
|
"logps": -274.5392761230469, |
|
"loss": 7.2793, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2992252204114347, |
|
"grad_norm": 194.18891480135753, |
|
"learning_rate": 4.576073113064759e-07, |
|
"logits": -2.2322239875793457, |
|
"logps": -306.53765869140625, |
|
"loss": 7.2126, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30991183542612877, |
|
"grad_norm": 210.99118334067984, |
|
"learning_rate": 4.5225424859373684e-07, |
|
"logits": -1.9125369787216187, |
|
"logps": -301.7188415527344, |
|
"loss": 7.2698, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32059845044082286, |
|
"grad_norm": 231.166437495367, |
|
"learning_rate": 4.4661831502586244e-07, |
|
"logits": -2.2598278522491455, |
|
"logps": -280.9080505371094, |
|
"loss": 7.3962, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33128506545551695, |
|
"grad_norm": 208.78382876532186, |
|
"learning_rate": 4.407073929655666e-07, |
|
"logits": -2.394502878189087, |
|
"logps": -311.90313720703125, |
|
"loss": 7.2151, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.34197168047021104, |
|
"grad_norm": 215.66312719335843, |
|
"learning_rate": 4.345297493718352e-07, |
|
"logits": -2.2727105617523193, |
|
"logps": -330.09454345703125, |
|
"loss": 7.1174, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3526582954849052, |
|
"grad_norm": 264.75577128712837, |
|
"learning_rate": 4.280940242378362e-07, |
|
"logits": -2.451925039291382, |
|
"logps": -306.2353515625, |
|
"loss": 7.1723, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.36334491049959927, |
|
"grad_norm": 228.79614002477874, |
|
"learning_rate": 4.2140921850710855e-07, |
|
"logits": -2.2807087898254395, |
|
"logps": -277.45513916015625, |
|
"loss": 7.138, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37403152551429336, |
|
"grad_norm": 214.03399197355702, |
|
"learning_rate": 4.1448468148492814e-07, |
|
"logits": -2.1667227745056152, |
|
"logps": -283.96124267578125, |
|
"loss": 7.1358, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.38471814052898745, |
|
"grad_norm": 202.51891762833395, |
|
"learning_rate": 4.0733009776245937e-07, |
|
"logits": -2.267343759536743, |
|
"logps": -294.8756408691406, |
|
"loss": 7.2722, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39540475554368154, |
|
"grad_norm": 196.58236698857016, |
|
"learning_rate": 3.9995547367197843e-07, |
|
"logits": -2.249849319458008, |
|
"logps": -241.7456817626953, |
|
"loss": 7.0336, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 242.5442077272206, |
|
"learning_rate": 3.92371123292113e-07, |
|
"logits": -2.30956768989563, |
|
"logps": -291.7135314941406, |
|
"loss": 7.1338, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4167779855730697, |
|
"grad_norm": 205.7646598262892, |
|
"learning_rate": 3.8458765402267056e-07, |
|
"logits": -2.2470812797546387, |
|
"logps": -344.83697509765625, |
|
"loss": 7.0991, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4274646005877638, |
|
"grad_norm": 216.7698934838153, |
|
"learning_rate": 3.766159517492307e-07, |
|
"logits": -2.2487077713012695, |
|
"logps": -265.8359375, |
|
"loss": 7.0658, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4381512156024579, |
|
"grad_norm": 226.71190138381652, |
|
"learning_rate": 3.6846716561824967e-07, |
|
"logits": -2.062194585800171, |
|
"logps": -281.28875732421875, |
|
"loss": 7.0913, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.448837830617152, |
|
"grad_norm": 203.05337694244434, |
|
"learning_rate": 3.601526924439709e-07, |
|
"logits": -1.9705560207366943, |
|
"logps": -292.10076904296875, |
|
"loss": 7.1005, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45952444563184613, |
|
"grad_norm": 225.29279653900883, |
|
"learning_rate": 3.516841607689501e-07, |
|
"logits": -1.8947185277938843, |
|
"logps": -277.2485656738281, |
|
"loss": 6.8025, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4702110606465402, |
|
"grad_norm": 201.0670416518307, |
|
"learning_rate": 3.430734146004863e-07, |
|
"logits": -1.8409500122070312, |
|
"logps": -241.2770538330078, |
|
"loss": 6.9224, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4808976756612343, |
|
"grad_norm": 223.20105131848624, |
|
"learning_rate": 3.343324968457075e-07, |
|
"logits": -2.0675368309020996, |
|
"logps": -319.1631774902344, |
|
"loss": 6.9332, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4915842906759284, |
|
"grad_norm": 203.4619800698717, |
|
"learning_rate": 3.2547363246847546e-07, |
|
"logits": -2.032095432281494, |
|
"logps": -329.3011169433594, |
|
"loss": 6.959, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5022709056906225, |
|
"grad_norm": 195.3765181768678, |
|
"learning_rate": 3.1650921139166874e-07, |
|
"logits": -2.065058469772339, |
|
"logps": -264.1317443847656, |
|
"loss": 6.9561, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5129575207053166, |
|
"grad_norm": 219.86422779627276, |
|
"learning_rate": 3.0927009442887437e-07, |
|
"logits": -2.0100936889648438, |
|
"logps": -313.6205139160156, |
|
"loss": 6.8973, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5236441357200107, |
|
"grad_norm": 212.81114982295514, |
|
"learning_rate": 3.0197792270443976e-07, |
|
"logits": -2.245912551879883, |
|
"logps": -290.1841735839844, |
|
"loss": 6.8105, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5343307507347048, |
|
"grad_norm": 187.3147065739447, |
|
"learning_rate": 2.927980480494938e-07, |
|
"logits": -2.414654016494751, |
|
"logps": -277.44146728515625, |
|
"loss": 6.8406, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5343307507347048, |
|
"eval_logits": -2.2185680866241455, |
|
"eval_logps": -311.0428466796875, |
|
"eval_loss": 6.951282978057861, |
|
"eval_runtime": 698.6834, |
|
"eval_samples_per_second": 2.817, |
|
"eval_steps_per_second": 0.176, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450173657493989, |
|
"grad_norm": 227.083154432323, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits": -1.9179697036743164, |
|
"logps": -291.5309143066406, |
|
"loss": 6.822, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.555703980764093, |
|
"grad_norm": 245.16541245685002, |
|
"learning_rate": 2.74271650519322e-07, |
|
"logits": -1.9247322082519531, |
|
"logps": -296.0821533203125, |
|
"loss": 6.8422, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.566390595778787, |
|
"grad_norm": 225.15920603486012, |
|
"learning_rate": 2.6495103848625854e-07, |
|
"logits": -2.181062936782837, |
|
"logps": -283.18682861328125, |
|
"loss": 6.7859, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5770772107934812, |
|
"grad_norm": 299.00671646485944, |
|
"learning_rate": 2.5560951607395126e-07, |
|
"logits": -1.8866008520126343, |
|
"logps": -279.102783203125, |
|
"loss": 7.1417, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5877638258081752, |
|
"grad_norm": 230.71910708961073, |
|
"learning_rate": 2.4626014824618413e-07, |
|
"logits": -2.221686601638794, |
|
"logps": -287.7673034667969, |
|
"loss": 6.8403, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5984504408228694, |
|
"grad_norm": 211.41031806648016, |
|
"learning_rate": 2.3691601093926402e-07, |
|
"logits": -1.7728792428970337, |
|
"logps": -321.795654296875, |
|
"loss": 6.8701, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 253.292924709598, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits": -1.8754329681396484, |
|
"logps": -265.4561462402344, |
|
"loss": 6.7639, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6198236708522575, |
|
"grad_norm": 205.31020938154433, |
|
"learning_rate": 2.1829567677876297e-07, |
|
"logits": -1.9769783020019531, |
|
"logps": -317.8040466308594, |
|
"loss": 6.8172, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305102858669517, |
|
"grad_norm": 207.84276096155014, |
|
"learning_rate": 2.0904552214621556e-07, |
|
"logits": -1.676018476486206, |
|
"logps": -265.141357421875, |
|
"loss": 6.7718, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6411969008816457, |
|
"grad_norm": 204.44859670052526, |
|
"learning_rate": 1.998526460541818e-07, |
|
"logits": -1.9445130825042725, |
|
"logps": -284.47369384765625, |
|
"loss": 6.7747, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6518835158963399, |
|
"grad_norm": 209.6039341920446, |
|
"learning_rate": 1.9072990557112564e-07, |
|
"logits": -2.0264129638671875, |
|
"logps": -297.75054931640625, |
|
"loss": 6.7677, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6625701309110339, |
|
"grad_norm": 215.0612799009116, |
|
"learning_rate": 1.8169005967452e-07, |
|
"logits": -1.8567161560058594, |
|
"logps": -280.9984130859375, |
|
"loss": 6.8603, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.673256745925728, |
|
"grad_norm": 197.29312792480752, |
|
"learning_rate": 1.745263675315245e-07, |
|
"logits": -2.0486676692962646, |
|
"logps": -327.4547119140625, |
|
"loss": 6.8465, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6839433609404221, |
|
"grad_norm": 238.26624940734476, |
|
"learning_rate": 1.656675031542925e-07, |
|
"logits": -2.019059658050537, |
|
"logps": -274.0588073730469, |
|
"loss": 6.6012, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6946299759551162, |
|
"grad_norm": 220.9919549477957, |
|
"learning_rate": 1.569265853995137e-07, |
|
"logits": -2.256427049636841, |
|
"logps": -325.6917419433594, |
|
"loss": 6.7183, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7053165909698104, |
|
"grad_norm": 212.1271664375421, |
|
"learning_rate": 1.4831583923104998e-07, |
|
"logits": -2.2320096492767334, |
|
"logps": -313.65789794921875, |
|
"loss": 6.6207, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160032059845044, |
|
"grad_norm": 207.7521264933154, |
|
"learning_rate": 1.3984730755602903e-07, |
|
"logits": -1.9610904455184937, |
|
"logps": -343.70404052734375, |
|
"loss": 6.5826, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7266898209991985, |
|
"grad_norm": 215.3834907746626, |
|
"learning_rate": 1.3153283438175034e-07, |
|
"logits": -2.140193462371826, |
|
"logps": -273.7430114746094, |
|
"loss": 6.7632, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7373764360138926, |
|
"grad_norm": 256.3727047818064, |
|
"learning_rate": 1.2338404825076935e-07, |
|
"logits": -2.2074012756347656, |
|
"logps": -314.4117431640625, |
|
"loss": 6.7545, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7480630510285867, |
|
"grad_norm": 201.02797158653888, |
|
"learning_rate": 1.1541234597732947e-07, |
|
"logits": -2.0764918327331543, |
|
"logps": -277.55096435546875, |
|
"loss": 6.591, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7587496660432808, |
|
"grad_norm": 218.96272655449943, |
|
"learning_rate": 1.0762887670788701e-07, |
|
"logits": -2.157193422317505, |
|
"logps": -280.75982666015625, |
|
"loss": 6.7929, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7694362810579749, |
|
"grad_norm": 208.78021518365045, |
|
"learning_rate": 1.0004452632802158e-07, |
|
"logits": -1.925675392150879, |
|
"logps": -310.53741455078125, |
|
"loss": 6.7644, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7801228960726689, |
|
"grad_norm": 216.30827661654823, |
|
"learning_rate": 9.266990223754067e-08, |
|
"logits": -2.0759708881378174, |
|
"logps": -283.6690673828125, |
|
"loss": 6.8653, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7908095110873631, |
|
"grad_norm": 205.4821190378051, |
|
"learning_rate": 8.551531851507185e-08, |
|
"logits": -1.9833778142929077, |
|
"logps": -288.6242980957031, |
|
"loss": 6.6705, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8014961261020572, |
|
"grad_norm": 251.26666947633868, |
|
"learning_rate": 7.859078149289144e-08, |
|
"logits": -1.9092228412628174, |
|
"logps": -291.9231872558594, |
|
"loss": 6.8123, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 189.97241816084082, |
|
"learning_rate": 7.190597576216384e-08, |
|
"logits": -1.845017433166504, |
|
"logps": -301.9135437011719, |
|
"loss": 6.6675, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8228693561314454, |
|
"grad_norm": 218.01784907983554, |
|
"learning_rate": 6.547025062816486e-08, |
|
"logits": -1.711168885231018, |
|
"logps": -295.16204833984375, |
|
"loss": 6.8118, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8335559711461394, |
|
"grad_norm": 211.42106624912535, |
|
"learning_rate": 5.929260703443337e-08, |
|
"logits": -1.875091552734375, |
|
"logps": -295.939697265625, |
|
"loss": 6.653, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8442425861608336, |
|
"grad_norm": 200.5784556196799, |
|
"learning_rate": 5.338168497413756e-08, |
|
"logits": -1.9821618795394897, |
|
"logps": -275.2363586425781, |
|
"loss": 6.7263, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8549292011755276, |
|
"grad_norm": 205.842593450728, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits": -2.117987871170044, |
|
"logps": -302.21337890625, |
|
"loss": 6.8007, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8656158161902218, |
|
"grad_norm": 194.5717778365053, |
|
"learning_rate": 4.2392688693524055e-08, |
|
"logits": -2.10815167427063, |
|
"logps": -312.15777587890625, |
|
"loss": 6.8551, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8763024312049158, |
|
"grad_norm": 202.9851351806552, |
|
"learning_rate": 3.732998357816514e-08, |
|
"logits": -1.8980505466461182, |
|
"logps": -293.62109375, |
|
"loss": 6.8402, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.88698904621961, |
|
"grad_norm": 195.97476528351888, |
|
"learning_rate": 3.256471671107616e-08, |
|
"logits": -1.9927390813827515, |
|
"logps": -272.60333251953125, |
|
"loss": 6.6945, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.897675661234304, |
|
"grad_norm": 213.77642939097004, |
|
"learning_rate": 2.8103552748861475e-08, |
|
"logits": -2.258983612060547, |
|
"logps": -289.2433166503906, |
|
"loss": 6.8397, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9083622762489981, |
|
"grad_norm": 205.12665728294334, |
|
"learning_rate": 2.3952731032714973e-08, |
|
"logits": -2.0726983547210693, |
|
"logps": -287.94366455078125, |
|
"loss": 6.8186, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9190488912636923, |
|
"grad_norm": 327.06404418966537, |
|
"learning_rate": 2.085943603250595e-08, |
|
"logits": -2.1224637031555176, |
|
"logps": -316.38055419921875, |
|
"loss": 6.6469, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9297355062783863, |
|
"grad_norm": 209.9878479167945, |
|
"learning_rate": 1.7281562838948966e-08, |
|
"logits": -2.207428455352783, |
|
"logps": -287.23980712890625, |
|
"loss": 6.7807, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9404221212930804, |
|
"grad_norm": 204.6141972372138, |
|
"learning_rate": 1.4029167422908105e-08, |
|
"logits": -2.080173969268799, |
|
"logps": -270.1136779785156, |
|
"loss": 6.5741, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9511087363077745, |
|
"grad_norm": 262.190215194203, |
|
"learning_rate": 1.1106798553464802e-08, |
|
"logits": -2.0705575942993164, |
|
"logps": -331.4059753417969, |
|
"loss": 6.882, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9617953513224686, |
|
"grad_norm": 262.4229244994443, |
|
"learning_rate": 8.518543427732949e-09, |
|
"logits": -2.039073944091797, |
|
"logps": -295.8294677734375, |
|
"loss": 6.7283, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9724819663371627, |
|
"grad_norm": 209.90951283828468, |
|
"learning_rate": 6.268021954544095e-09, |
|
"logits": -1.8816810846328735, |
|
"logps": -292.3460998535156, |
|
"loss": 6.5609, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9831685813518568, |
|
"grad_norm": 222.97533041380498, |
|
"learning_rate": 4.358381691677931e-09, |
|
"logits": -1.8735195398330688, |
|
"logps": -253.9568328857422, |
|
"loss": 6.7129, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9938551963665508, |
|
"grad_norm": 238.54526372408858, |
|
"learning_rate": 2.7922934437178692e-09, |
|
"logits": -2.1066086292266846, |
|
"logps": -293.4354248046875, |
|
"loss": 6.7326, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9981298423724285, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 1.3275885816815067, |
|
"train_runtime": 8211.0134, |
|
"train_samples_per_second": 7.293, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 125, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|