|
{ |
|
"best_metric": 10.55037784576416, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.020064205457463884, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00010032102728731942, |
|
"grad_norm": 1.5991774797439575, |
|
"learning_rate": 1.009e-05, |
|
"loss": 22.2622, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00010032102728731942, |
|
"eval_loss": 11.123811721801758, |
|
"eval_runtime": 70.5623, |
|
"eval_samples_per_second": 59.493, |
|
"eval_steps_per_second": 14.88, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00020064205457463884, |
|
"grad_norm": 1.434697151184082, |
|
"learning_rate": 2.018e-05, |
|
"loss": 22.2601, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00030096308186195825, |
|
"grad_norm": 1.767243504524231, |
|
"learning_rate": 3.027e-05, |
|
"loss": 22.2774, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0004012841091492777, |
|
"grad_norm": 1.4239779710769653, |
|
"learning_rate": 4.036e-05, |
|
"loss": 22.2314, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0005016051364365971, |
|
"grad_norm": 1.3848161697387695, |
|
"learning_rate": 5.045e-05, |
|
"loss": 22.2259, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0006019261637239165, |
|
"grad_norm": 1.6321865320205688, |
|
"learning_rate": 6.054e-05, |
|
"loss": 22.2408, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0007022471910112359, |
|
"grad_norm": 1.7302271127700806, |
|
"learning_rate": 7.062999999999999e-05, |
|
"loss": 22.2005, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0008025682182985554, |
|
"grad_norm": 1.7554914951324463, |
|
"learning_rate": 8.072e-05, |
|
"loss": 22.1973, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0009028892455858748, |
|
"grad_norm": 1.476845145225525, |
|
"learning_rate": 9.081e-05, |
|
"loss": 22.1782, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0010032102728731941, |
|
"grad_norm": 1.6162638664245605, |
|
"learning_rate": 0.0001009, |
|
"loss": 22.1558, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0011035313001605137, |
|
"grad_norm": 1.559161901473999, |
|
"learning_rate": 0.00010036894736842106, |
|
"loss": 22.1244, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.001203852327447833, |
|
"grad_norm": 1.6010931730270386, |
|
"learning_rate": 9.98378947368421e-05, |
|
"loss": 22.0848, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0013041733547351525, |
|
"grad_norm": 1.7077960968017578, |
|
"learning_rate": 9.930684210526315e-05, |
|
"loss": 22.0494, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0014044943820224719, |
|
"grad_norm": 1.5489342212677002, |
|
"learning_rate": 9.877578947368421e-05, |
|
"loss": 22.0367, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0015048154093097914, |
|
"grad_norm": 1.1338545083999634, |
|
"learning_rate": 9.824473684210527e-05, |
|
"loss": 22.0596, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0016051364365971107, |
|
"grad_norm": 1.444381833076477, |
|
"learning_rate": 9.771368421052632e-05, |
|
"loss": 21.9966, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0017054574638844303, |
|
"grad_norm": 1.4157575368881226, |
|
"learning_rate": 9.718263157894736e-05, |
|
"loss": 21.9681, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0018057784911717496, |
|
"grad_norm": 1.5229800939559937, |
|
"learning_rate": 9.665157894736842e-05, |
|
"loss": 21.9163, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001906099518459069, |
|
"grad_norm": 1.5339312553405762, |
|
"learning_rate": 9.612052631578948e-05, |
|
"loss": 21.8712, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0020064205457463883, |
|
"grad_norm": 1.4681302309036255, |
|
"learning_rate": 9.558947368421052e-05, |
|
"loss": 21.9278, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002106741573033708, |
|
"grad_norm": 1.7636168003082275, |
|
"learning_rate": 9.505842105263159e-05, |
|
"loss": 21.7252, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0022070626003210273, |
|
"grad_norm": 1.4351588487625122, |
|
"learning_rate": 9.452736842105263e-05, |
|
"loss": 21.7812, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.002307383627608347, |
|
"grad_norm": 1.4266163110733032, |
|
"learning_rate": 9.399631578947368e-05, |
|
"loss": 21.767, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.002407704654895666, |
|
"grad_norm": 1.3205598592758179, |
|
"learning_rate": 9.346526315789474e-05, |
|
"loss": 21.742, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0025080256821829855, |
|
"grad_norm": 1.32651948928833, |
|
"learning_rate": 9.293421052631578e-05, |
|
"loss": 21.7368, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.002608346709470305, |
|
"grad_norm": 1.4364937543869019, |
|
"learning_rate": 9.240315789473684e-05, |
|
"loss": 21.647, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.002708667736757624, |
|
"grad_norm": 1.3655853271484375, |
|
"learning_rate": 9.18721052631579e-05, |
|
"loss": 21.644, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0028089887640449437, |
|
"grad_norm": 1.3614681959152222, |
|
"learning_rate": 9.134105263157895e-05, |
|
"loss": 21.5887, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0029093097913322633, |
|
"grad_norm": 1.360560655593872, |
|
"learning_rate": 9.081e-05, |
|
"loss": 21.555, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.003009630818619583, |
|
"grad_norm": 1.2450507879257202, |
|
"learning_rate": 9.027894736842105e-05, |
|
"loss": 21.5753, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.003109951845906902, |
|
"grad_norm": 1.2793240547180176, |
|
"learning_rate": 8.97478947368421e-05, |
|
"loss": 21.5722, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0032102728731942215, |
|
"grad_norm": 1.0861470699310303, |
|
"learning_rate": 8.921684210526316e-05, |
|
"loss": 21.5705, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.003310593900481541, |
|
"grad_norm": 1.2511563301086426, |
|
"learning_rate": 8.86857894736842e-05, |
|
"loss": 21.4441, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0034109149277688606, |
|
"grad_norm": 0.9954004287719727, |
|
"learning_rate": 8.815473684210527e-05, |
|
"loss": 21.6331, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0035112359550561797, |
|
"grad_norm": 1.0669211149215698, |
|
"learning_rate": 8.762368421052631e-05, |
|
"loss": 21.4872, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0036115569823434992, |
|
"grad_norm": 1.0056594610214233, |
|
"learning_rate": 8.709263157894737e-05, |
|
"loss": 21.4733, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0037118780096308188, |
|
"grad_norm": 1.016147255897522, |
|
"learning_rate": 8.656157894736843e-05, |
|
"loss": 21.4347, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.003812199036918138, |
|
"grad_norm": 1.1539530754089355, |
|
"learning_rate": 8.603052631578947e-05, |
|
"loss": 21.3633, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.003912520064205458, |
|
"grad_norm": 0.9206962585449219, |
|
"learning_rate": 8.549947368421052e-05, |
|
"loss": 21.4637, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0040128410914927765, |
|
"grad_norm": 0.833474338054657, |
|
"learning_rate": 8.496842105263158e-05, |
|
"loss": 21.4302, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004113162118780096, |
|
"grad_norm": 0.8578913807868958, |
|
"learning_rate": 8.443736842105264e-05, |
|
"loss": 21.4463, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.004213483146067416, |
|
"grad_norm": 1.1002817153930664, |
|
"learning_rate": 8.390631578947369e-05, |
|
"loss": 21.2379, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.004313804173354735, |
|
"grad_norm": 0.831058144569397, |
|
"learning_rate": 8.337526315789473e-05, |
|
"loss": 21.4246, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.004414125200642055, |
|
"grad_norm": 0.7859891057014465, |
|
"learning_rate": 8.284421052631579e-05, |
|
"loss": 21.318, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.004514446227929374, |
|
"grad_norm": 0.7401735782623291, |
|
"learning_rate": 8.231315789473685e-05, |
|
"loss": 21.3669, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.004614767255216694, |
|
"grad_norm": 0.7849454879760742, |
|
"learning_rate": 8.178210526315789e-05, |
|
"loss": 21.3329, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0047150882825040125, |
|
"grad_norm": 0.6906037330627441, |
|
"learning_rate": 8.125105263157894e-05, |
|
"loss": 21.6264, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.004815409309791332, |
|
"grad_norm": 0.7339415550231934, |
|
"learning_rate": 8.072e-05, |
|
"loss": 21.5479, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0049157303370786515, |
|
"grad_norm": 0.7654588222503662, |
|
"learning_rate": 8.018894736842106e-05, |
|
"loss": 21.4293, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.005016051364365971, |
|
"grad_norm": 0.6686012148857117, |
|
"learning_rate": 7.965789473684211e-05, |
|
"loss": 21.401, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005016051364365971, |
|
"eval_loss": 10.654853820800781, |
|
"eval_runtime": 70.5622, |
|
"eval_samples_per_second": 59.494, |
|
"eval_steps_per_second": 14.88, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005116372391653291, |
|
"grad_norm": 0.6304632425308228, |
|
"learning_rate": 7.912684210526315e-05, |
|
"loss": 21.4006, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.00521669341894061, |
|
"grad_norm": 0.8711794018745422, |
|
"learning_rate": 7.859578947368421e-05, |
|
"loss": 21.125, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.00531701444622793, |
|
"grad_norm": 0.6818077564239502, |
|
"learning_rate": 7.806473684210527e-05, |
|
"loss": 21.2897, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.005417335473515248, |
|
"grad_norm": 0.7950479388237, |
|
"learning_rate": 7.753368421052631e-05, |
|
"loss": 21.1913, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.005517656500802568, |
|
"grad_norm": 0.6777336597442627, |
|
"learning_rate": 7.700263157894738e-05, |
|
"loss": 21.3268, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0056179775280898875, |
|
"grad_norm": 0.9595808386802673, |
|
"learning_rate": 7.647157894736842e-05, |
|
"loss": 21.0679, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.005718298555377207, |
|
"grad_norm": 0.7060173153877258, |
|
"learning_rate": 7.594052631578948e-05, |
|
"loss": 21.2321, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.005818619582664527, |
|
"grad_norm": 0.6887240409851074, |
|
"learning_rate": 7.540947368421053e-05, |
|
"loss": 21.1762, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.005918940609951846, |
|
"grad_norm": 0.6797646880149841, |
|
"learning_rate": 7.487842105263157e-05, |
|
"loss": 21.3383, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.006019261637239166, |
|
"grad_norm": 0.7469924092292786, |
|
"learning_rate": 7.434736842105263e-05, |
|
"loss": 21.1953, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.006119582664526484, |
|
"grad_norm": 0.8638947606086731, |
|
"learning_rate": 7.381631578947368e-05, |
|
"loss": 21.1223, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.006219903691813804, |
|
"grad_norm": 0.7506782412528992, |
|
"learning_rate": 7.328526315789474e-05, |
|
"loss": 21.1332, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.006320224719101123, |
|
"grad_norm": 0.7623918652534485, |
|
"learning_rate": 7.27542105263158e-05, |
|
"loss": 21.2479, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.006420545746388443, |
|
"grad_norm": 0.6180063486099243, |
|
"learning_rate": 7.222315789473684e-05, |
|
"loss": 21.2955, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0065208667736757625, |
|
"grad_norm": 0.7025743722915649, |
|
"learning_rate": 7.16921052631579e-05, |
|
"loss": 21.0339, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.006621187800963082, |
|
"grad_norm": 0.6287188529968262, |
|
"learning_rate": 7.116105263157895e-05, |
|
"loss": 21.2094, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.006721508828250402, |
|
"grad_norm": 0.6974558234214783, |
|
"learning_rate": 7.062999999999999e-05, |
|
"loss": 21.4861, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.006821829855537721, |
|
"grad_norm": 0.6953954696655273, |
|
"learning_rate": 7.009894736842106e-05, |
|
"loss": 21.3575, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.00692215088282504, |
|
"grad_norm": 0.7836357951164246, |
|
"learning_rate": 6.95678947368421e-05, |
|
"loss": 21.5824, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.007022471910112359, |
|
"grad_norm": 0.5490475296974182, |
|
"learning_rate": 6.903684210526316e-05, |
|
"loss": 21.3933, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.007122792937399679, |
|
"grad_norm": 0.5662202835083008, |
|
"learning_rate": 6.850578947368422e-05, |
|
"loss": 21.1143, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0072231139646869984, |
|
"grad_norm": 0.6373079419136047, |
|
"learning_rate": 6.797473684210526e-05, |
|
"loss": 21.1235, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.007323434991974318, |
|
"grad_norm": 0.7429666519165039, |
|
"learning_rate": 6.744368421052631e-05, |
|
"loss": 21.1772, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0074237560192616375, |
|
"grad_norm": 0.5452073812484741, |
|
"learning_rate": 6.691263157894736e-05, |
|
"loss": 21.2562, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.007524077046548957, |
|
"grad_norm": 0.5880535244941711, |
|
"learning_rate": 6.638157894736843e-05, |
|
"loss": 21.2677, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.007624398073836276, |
|
"grad_norm": 1.1386655569076538, |
|
"learning_rate": 6.585052631578948e-05, |
|
"loss": 21.0751, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.007724719101123595, |
|
"grad_norm": 0.6015380024909973, |
|
"learning_rate": 6.531947368421052e-05, |
|
"loss": 21.2662, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.007825040128410916, |
|
"grad_norm": 0.6043453216552734, |
|
"learning_rate": 6.478842105263158e-05, |
|
"loss": 21.0845, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.007925361155698234, |
|
"grad_norm": 0.715646505355835, |
|
"learning_rate": 6.425736842105264e-05, |
|
"loss": 21.035, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.008025682182985553, |
|
"grad_norm": 0.4522017538547516, |
|
"learning_rate": 6.372631578947368e-05, |
|
"loss": 21.2421, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.008126003210272873, |
|
"grad_norm": 0.591495156288147, |
|
"learning_rate": 6.319526315789473e-05, |
|
"loss": 21.2218, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.008226324237560192, |
|
"grad_norm": 0.8266862034797668, |
|
"learning_rate": 6.266421052631579e-05, |
|
"loss": 21.3115, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.008326645264847513, |
|
"grad_norm": 0.5965524911880493, |
|
"learning_rate": 6.213315789473685e-05, |
|
"loss": 21.1308, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.008426966292134831, |
|
"grad_norm": 0.5218135118484497, |
|
"learning_rate": 6.16021052631579e-05, |
|
"loss": 21.2285, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.008527287319422152, |
|
"grad_norm": 0.6091591715812683, |
|
"learning_rate": 6.107105263157894e-05, |
|
"loss": 21.2382, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.00862760834670947, |
|
"grad_norm": 0.5717049241065979, |
|
"learning_rate": 6.054e-05, |
|
"loss": 21.3019, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.008727929373996789, |
|
"grad_norm": 0.49166449904441833, |
|
"learning_rate": 6.000894736842105e-05, |
|
"loss": 21.1254, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.00882825040128411, |
|
"grad_norm": 0.6972517967224121, |
|
"learning_rate": 5.94778947368421e-05, |
|
"loss": 21.1412, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.008928571428571428, |
|
"grad_norm": 0.508397102355957, |
|
"learning_rate": 5.894684210526316e-05, |
|
"loss": 21.1448, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.009028892455858748, |
|
"grad_norm": 0.5189328193664551, |
|
"learning_rate": 5.841578947368421e-05, |
|
"loss": 21.3218, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.009129213483146067, |
|
"grad_norm": 0.5766506195068359, |
|
"learning_rate": 5.7884736842105265e-05, |
|
"loss": 21.1138, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.009229534510433388, |
|
"grad_norm": 0.4988974630832672, |
|
"learning_rate": 5.7353684210526314e-05, |
|
"loss": 21.0883, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.009329855537720706, |
|
"grad_norm": 0.6053217053413391, |
|
"learning_rate": 5.6822631578947364e-05, |
|
"loss": 21.1423, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.009430176565008025, |
|
"grad_norm": 0.6934704184532166, |
|
"learning_rate": 5.629157894736842e-05, |
|
"loss": 21.2583, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.009530497592295345, |
|
"grad_norm": 0.5622691512107849, |
|
"learning_rate": 5.576052631578948e-05, |
|
"loss": 21.2342, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.009630818619582664, |
|
"grad_norm": 0.5352026224136353, |
|
"learning_rate": 5.522947368421053e-05, |
|
"loss": 21.1231, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.009731139646869984, |
|
"grad_norm": 0.4841010868549347, |
|
"learning_rate": 5.469842105263158e-05, |
|
"loss": 21.1777, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.009831460674157303, |
|
"grad_norm": 0.5576900839805603, |
|
"learning_rate": 5.416736842105263e-05, |
|
"loss": 21.1561, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.009931781701444624, |
|
"grad_norm": 0.5335264205932617, |
|
"learning_rate": 5.3636315789473685e-05, |
|
"loss": 21.2953, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.010032102728731942, |
|
"grad_norm": 0.5405234098434448, |
|
"learning_rate": 5.3105263157894734e-05, |
|
"loss": 21.3645, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.010032102728731942, |
|
"eval_loss": 10.584715843200684, |
|
"eval_runtime": 70.5882, |
|
"eval_samples_per_second": 59.472, |
|
"eval_steps_per_second": 14.875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01013242375601926, |
|
"grad_norm": 0.6677088141441345, |
|
"learning_rate": 5.257421052631578e-05, |
|
"loss": 21.1299, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.010232744783306581, |
|
"grad_norm": 0.5807069540023804, |
|
"learning_rate": 5.2043157894736846e-05, |
|
"loss": 21.0427, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0103330658105939, |
|
"grad_norm": 0.501835823059082, |
|
"learning_rate": 5.1512105263157895e-05, |
|
"loss": 21.1984, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.01043338683788122, |
|
"grad_norm": 0.7399368286132812, |
|
"learning_rate": 5.098105263157895e-05, |
|
"loss": 21.1552, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.010533707865168539, |
|
"grad_norm": 0.8803501725196838, |
|
"learning_rate": 5.045e-05, |
|
"loss": 20.9362, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01063402889245586, |
|
"grad_norm": 0.6539490818977356, |
|
"learning_rate": 4.991894736842105e-05, |
|
"loss": 21.3907, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.010734349919743178, |
|
"grad_norm": 0.8279363512992859, |
|
"learning_rate": 4.9387894736842105e-05, |
|
"loss": 21.1683, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.010834670947030497, |
|
"grad_norm": 0.7174587845802307, |
|
"learning_rate": 4.885684210526316e-05, |
|
"loss": 21.0967, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.010934991974317817, |
|
"grad_norm": 0.5338941216468811, |
|
"learning_rate": 4.832578947368421e-05, |
|
"loss": 21.2763, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.011035313001605136, |
|
"grad_norm": 0.5613208413124084, |
|
"learning_rate": 4.779473684210526e-05, |
|
"loss": 21.3178, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.011135634028892456, |
|
"grad_norm": 0.5921754240989685, |
|
"learning_rate": 4.7263684210526315e-05, |
|
"loss": 21.1902, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.011235955056179775, |
|
"grad_norm": 0.6494777798652649, |
|
"learning_rate": 4.673263157894737e-05, |
|
"loss": 21.1112, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.011336276083467095, |
|
"grad_norm": 0.6433371901512146, |
|
"learning_rate": 4.620157894736842e-05, |
|
"loss": 21.1761, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.011436597110754414, |
|
"grad_norm": 0.915080726146698, |
|
"learning_rate": 4.5670526315789475e-05, |
|
"loss": 21.0194, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.011536918138041733, |
|
"grad_norm": 1.0387072563171387, |
|
"learning_rate": 4.5139473684210524e-05, |
|
"loss": 20.9303, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.011637239165329053, |
|
"grad_norm": 0.6426976919174194, |
|
"learning_rate": 4.460842105263158e-05, |
|
"loss": 21.3885, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.011737560192616372, |
|
"grad_norm": 0.923250138759613, |
|
"learning_rate": 4.4077368421052636e-05, |
|
"loss": 20.8614, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.011837881219903692, |
|
"grad_norm": 0.5725429058074951, |
|
"learning_rate": 4.3546315789473685e-05, |
|
"loss": 21.1852, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.011938202247191011, |
|
"grad_norm": 0.656847357749939, |
|
"learning_rate": 4.3015263157894734e-05, |
|
"loss": 21.3078, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.012038523274478331, |
|
"grad_norm": 0.5193572640419006, |
|
"learning_rate": 4.248421052631579e-05, |
|
"loss": 21.1464, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01213884430176565, |
|
"grad_norm": 0.7125611901283264, |
|
"learning_rate": 4.1953157894736846e-05, |
|
"loss": 21.1635, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.012239165329052969, |
|
"grad_norm": 0.523034930229187, |
|
"learning_rate": 4.1422105263157895e-05, |
|
"loss": 21.1683, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.012339486356340289, |
|
"grad_norm": 0.540019154548645, |
|
"learning_rate": 4.0891052631578944e-05, |
|
"loss": 21.0405, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.012439807383627608, |
|
"grad_norm": 0.6048979759216309, |
|
"learning_rate": 4.036e-05, |
|
"loss": 20.9969, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.012540128410914928, |
|
"grad_norm": 0.6617588996887207, |
|
"learning_rate": 3.9828947368421056e-05, |
|
"loss": 20.9615, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.012640449438202247, |
|
"grad_norm": 0.5792971253395081, |
|
"learning_rate": 3.9297894736842105e-05, |
|
"loss": 21.0814, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.012740770465489567, |
|
"grad_norm": 0.8640639185905457, |
|
"learning_rate": 3.8766842105263154e-05, |
|
"loss": 20.9608, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.012841091492776886, |
|
"grad_norm": 0.6493314504623413, |
|
"learning_rate": 3.823578947368421e-05, |
|
"loss": 21.0183, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.012941412520064205, |
|
"grad_norm": 0.5835341811180115, |
|
"learning_rate": 3.7704736842105265e-05, |
|
"loss": 21.247, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.013041733547351525, |
|
"grad_norm": 0.5780165791511536, |
|
"learning_rate": 3.7173684210526315e-05, |
|
"loss": 21.1309, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.013142054574638844, |
|
"grad_norm": 0.603012204170227, |
|
"learning_rate": 3.664263157894737e-05, |
|
"loss": 21.105, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.013242375601926164, |
|
"grad_norm": 0.5957277417182922, |
|
"learning_rate": 3.611157894736842e-05, |
|
"loss": 21.1341, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.013342696629213483, |
|
"grad_norm": 0.5657469034194946, |
|
"learning_rate": 3.5580526315789475e-05, |
|
"loss": 21.2018, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.013443017656500803, |
|
"grad_norm": 0.5668490529060364, |
|
"learning_rate": 3.504947368421053e-05, |
|
"loss": 21.2215, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.013543338683788122, |
|
"grad_norm": 0.6400595903396606, |
|
"learning_rate": 3.451842105263158e-05, |
|
"loss": 21.1004, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.013643659711075442, |
|
"grad_norm": 0.5542194247245789, |
|
"learning_rate": 3.398736842105263e-05, |
|
"loss": 21.1221, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.013743980738362761, |
|
"grad_norm": 0.5827724933624268, |
|
"learning_rate": 3.345631578947368e-05, |
|
"loss": 21.1213, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.01384430176565008, |
|
"grad_norm": 0.5147905945777893, |
|
"learning_rate": 3.292526315789474e-05, |
|
"loss": 21.1524, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.0139446227929374, |
|
"grad_norm": 0.6220738291740417, |
|
"learning_rate": 3.239421052631579e-05, |
|
"loss": 21.0253, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.014044943820224719, |
|
"grad_norm": 0.6777515411376953, |
|
"learning_rate": 3.186315789473684e-05, |
|
"loss": 21.3317, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.014145264847512039, |
|
"grad_norm": 0.6565226912498474, |
|
"learning_rate": 3.1332105263157895e-05, |
|
"loss": 20.9974, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.014245585874799358, |
|
"grad_norm": 0.6572129130363464, |
|
"learning_rate": 3.080105263157895e-05, |
|
"loss": 21.1355, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.014345906902086678, |
|
"grad_norm": 0.6094454526901245, |
|
"learning_rate": 3.027e-05, |
|
"loss": 21.1024, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.014446227929373997, |
|
"grad_norm": 0.5109902024269104, |
|
"learning_rate": 2.973894736842105e-05, |
|
"loss": 21.224, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.014546548956661316, |
|
"grad_norm": 0.7738908529281616, |
|
"learning_rate": 2.9207894736842105e-05, |
|
"loss": 21.4256, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.014646869983948636, |
|
"grad_norm": 0.5986607670783997, |
|
"learning_rate": 2.8676842105263157e-05, |
|
"loss": 21.2798, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.014747191011235955, |
|
"grad_norm": 0.6626487970352173, |
|
"learning_rate": 2.814578947368421e-05, |
|
"loss": 21.1379, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.014847512038523275, |
|
"grad_norm": 0.6779626607894897, |
|
"learning_rate": 2.7614736842105266e-05, |
|
"loss": 21.0229, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.014947833065810594, |
|
"grad_norm": 0.6698882579803467, |
|
"learning_rate": 2.7083684210526315e-05, |
|
"loss": 21.0818, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.015048154093097914, |
|
"grad_norm": 0.8434391021728516, |
|
"learning_rate": 2.6552631578947367e-05, |
|
"loss": 21.5482, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.015048154093097914, |
|
"eval_loss": 10.558324813842773, |
|
"eval_runtime": 70.5565, |
|
"eval_samples_per_second": 59.498, |
|
"eval_steps_per_second": 14.882, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.015148475120385233, |
|
"grad_norm": 0.8424636721611023, |
|
"learning_rate": 2.6021578947368423e-05, |
|
"loss": 20.936, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.015248796147672551, |
|
"grad_norm": 0.5448256731033325, |
|
"learning_rate": 2.5490526315789475e-05, |
|
"loss": 21.1742, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.015349117174959872, |
|
"grad_norm": 0.6612237095832825, |
|
"learning_rate": 2.4959473684210524e-05, |
|
"loss": 21.0709, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.01544943820224719, |
|
"grad_norm": 0.550390899181366, |
|
"learning_rate": 2.442842105263158e-05, |
|
"loss": 20.8933, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.015549759229534511, |
|
"grad_norm": 0.6066597700119019, |
|
"learning_rate": 2.389736842105263e-05, |
|
"loss": 20.9679, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.01565008025682183, |
|
"grad_norm": 0.595045804977417, |
|
"learning_rate": 2.3366315789473685e-05, |
|
"loss": 21.0887, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.01575040128410915, |
|
"grad_norm": 0.6363713145256042, |
|
"learning_rate": 2.2835263157894738e-05, |
|
"loss": 21.3898, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.01585072231139647, |
|
"grad_norm": 0.5982836484909058, |
|
"learning_rate": 2.230421052631579e-05, |
|
"loss": 21.1671, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.015951043338683787, |
|
"grad_norm": 0.7963234782218933, |
|
"learning_rate": 2.1773157894736843e-05, |
|
"loss": 20.9716, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.016051364365971106, |
|
"grad_norm": 0.647216796875, |
|
"learning_rate": 2.1242105263157895e-05, |
|
"loss": 21.1608, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.016151685393258428, |
|
"grad_norm": 0.5022075772285461, |
|
"learning_rate": 2.0711052631578947e-05, |
|
"loss": 21.1819, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.016252006420545747, |
|
"grad_norm": 0.5094108581542969, |
|
"learning_rate": 2.018e-05, |
|
"loss": 21.1568, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.016352327447833066, |
|
"grad_norm": 0.5434950590133667, |
|
"learning_rate": 1.9648947368421052e-05, |
|
"loss": 20.9586, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.016452648475120384, |
|
"grad_norm": 0.6874385476112366, |
|
"learning_rate": 1.9117894736842105e-05, |
|
"loss": 21.0375, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.016552969502407703, |
|
"grad_norm": 0.49630945920944214, |
|
"learning_rate": 1.8586842105263157e-05, |
|
"loss": 21.0352, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.016653290529695025, |
|
"grad_norm": 0.6111531257629395, |
|
"learning_rate": 1.805578947368421e-05, |
|
"loss": 21.2822, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.016753611556982344, |
|
"grad_norm": 0.5392615795135498, |
|
"learning_rate": 1.7524736842105266e-05, |
|
"loss": 21.1524, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.016853932584269662, |
|
"grad_norm": 0.5594942569732666, |
|
"learning_rate": 1.6993684210526315e-05, |
|
"loss": 21.1931, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.01695425361155698, |
|
"grad_norm": 0.5756310224533081, |
|
"learning_rate": 1.646263157894737e-05, |
|
"loss": 21.2089, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.017054574638844303, |
|
"grad_norm": 0.783043622970581, |
|
"learning_rate": 1.593157894736842e-05, |
|
"loss": 20.8715, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.017154895666131622, |
|
"grad_norm": 0.4898316562175751, |
|
"learning_rate": 1.5400526315789475e-05, |
|
"loss": 21.1136, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.01725521669341894, |
|
"grad_norm": 0.6354514360427856, |
|
"learning_rate": 1.4869473684210524e-05, |
|
"loss": 21.089, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.01735553772070626, |
|
"grad_norm": 0.6032927632331848, |
|
"learning_rate": 1.4338421052631579e-05, |
|
"loss": 21.0936, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.017455858747993578, |
|
"grad_norm": 0.6571072936058044, |
|
"learning_rate": 1.3807368421052633e-05, |
|
"loss": 20.9723, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0175561797752809, |
|
"grad_norm": 0.5904538035392761, |
|
"learning_rate": 1.3276315789473684e-05, |
|
"loss": 21.287, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.01765650080256822, |
|
"grad_norm": 0.5847862362861633, |
|
"learning_rate": 1.2745263157894738e-05, |
|
"loss": 21.1966, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.017756821829855537, |
|
"grad_norm": 0.5706862211227417, |
|
"learning_rate": 1.221421052631579e-05, |
|
"loss": 20.9706, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.017857142857142856, |
|
"grad_norm": 0.5410795211791992, |
|
"learning_rate": 1.1683157894736843e-05, |
|
"loss": 21.2563, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.01795746388443018, |
|
"grad_norm": 0.5394900441169739, |
|
"learning_rate": 1.1152105263157895e-05, |
|
"loss": 20.9992, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.018057784911717497, |
|
"grad_norm": 0.45912498235702515, |
|
"learning_rate": 1.0621052631578948e-05, |
|
"loss": 21.2137, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.018158105939004816, |
|
"grad_norm": 0.5860676765441895, |
|
"learning_rate": 1.009e-05, |
|
"loss": 21.1744, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.018258426966292134, |
|
"grad_norm": 0.7394751310348511, |
|
"learning_rate": 9.558947368421052e-06, |
|
"loss": 20.9416, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.018358747993579453, |
|
"grad_norm": 0.6703020334243774, |
|
"learning_rate": 9.027894736842105e-06, |
|
"loss": 20.9801, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.018459069020866775, |
|
"grad_norm": 0.5259845852851868, |
|
"learning_rate": 8.496842105263157e-06, |
|
"loss": 21.0672, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.018559390048154094, |
|
"grad_norm": 0.47938376665115356, |
|
"learning_rate": 7.96578947368421e-06, |
|
"loss": 21.0379, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.018659711075441412, |
|
"grad_norm": 0.6665632128715515, |
|
"learning_rate": 7.434736842105262e-06, |
|
"loss": 21.3934, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.01876003210272873, |
|
"grad_norm": 0.6356412172317505, |
|
"learning_rate": 6.903684210526316e-06, |
|
"loss": 21.3995, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.01886035313001605, |
|
"grad_norm": 0.5531170964241028, |
|
"learning_rate": 6.372631578947369e-06, |
|
"loss": 21.177, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.018960674157303372, |
|
"grad_norm": 0.5264145731925964, |
|
"learning_rate": 5.841578947368421e-06, |
|
"loss": 21.2467, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.01906099518459069, |
|
"grad_norm": 0.5184823870658875, |
|
"learning_rate": 5.310526315789474e-06, |
|
"loss": 21.0768, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01916131621187801, |
|
"grad_norm": 0.601334810256958, |
|
"learning_rate": 4.779473684210526e-06, |
|
"loss": 21.0318, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.019261637239165328, |
|
"grad_norm": 0.6639525890350342, |
|
"learning_rate": 4.248421052631579e-06, |
|
"loss": 20.9996, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.01936195826645265, |
|
"grad_norm": 0.47377097606658936, |
|
"learning_rate": 3.717368421052631e-06, |
|
"loss": 21.0164, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.01946227929373997, |
|
"grad_norm": 0.6908702254295349, |
|
"learning_rate": 3.1863157894736844e-06, |
|
"loss": 20.9247, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.019562600321027288, |
|
"grad_norm": 0.5369330048561096, |
|
"learning_rate": 2.655263157894737e-06, |
|
"loss": 21.3861, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.019662921348314606, |
|
"grad_norm": 0.6818935871124268, |
|
"learning_rate": 2.1242105263157893e-06, |
|
"loss": 21.0936, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.019763242375601925, |
|
"grad_norm": 0.6107151508331299, |
|
"learning_rate": 1.5931578947368422e-06, |
|
"loss": 21.0526, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.019863563402889247, |
|
"grad_norm": 0.6597663760185242, |
|
"learning_rate": 1.0621052631578947e-06, |
|
"loss": 21.1064, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.019963884430176566, |
|
"grad_norm": 0.6686668992042542, |
|
"learning_rate": 5.310526315789473e-07, |
|
"loss": 21.0729, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.020064205457463884, |
|
"grad_norm": 0.7321626543998718, |
|
"learning_rate": 0.0, |
|
"loss": 21.4709, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.020064205457463884, |
|
"eval_loss": 10.55037784576416, |
|
"eval_runtime": 70.588, |
|
"eval_samples_per_second": 59.472, |
|
"eval_steps_per_second": 14.875, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 11943700070400.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|