|
{ |
|
"best_metric": 11.038614273071289, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.09813542688910697, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009813542688910696, |
|
"grad_norm": 4.1202497482299805, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 44.3701, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009813542688910696, |
|
"eval_loss": 11.094230651855469, |
|
"eval_runtime": 5.8483, |
|
"eval_samples_per_second": 293.417, |
|
"eval_steps_per_second": 36.763, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001962708537782139, |
|
"grad_norm": 4.6952314376831055, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 44.357, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002944062806673209, |
|
"grad_norm": 4.975703239440918, |
|
"learning_rate": 1.5e-06, |
|
"loss": 44.3399, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003925417075564278, |
|
"grad_norm": 4.898032188415527, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 44.3496, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004906771344455349, |
|
"grad_norm": 4.699200630187988, |
|
"learning_rate": 2.5e-06, |
|
"loss": 44.3727, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005888125613346418, |
|
"grad_norm": 4.840972423553467, |
|
"learning_rate": 3e-06, |
|
"loss": 44.3562, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0068694798822374874, |
|
"grad_norm": 4.837454795837402, |
|
"learning_rate": 3.5e-06, |
|
"loss": 44.3481, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007850834151128557, |
|
"grad_norm": 4.493252754211426, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 44.3513, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008832188420019628, |
|
"grad_norm": 4.787064552307129, |
|
"learning_rate": 4.5e-06, |
|
"loss": 44.3664, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009813542688910697, |
|
"grad_norm": 4.681407928466797, |
|
"learning_rate": 5e-06, |
|
"loss": 44.3545, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010794896957801767, |
|
"grad_norm": 4.4647321701049805, |
|
"learning_rate": 4.99847706754774e-06, |
|
"loss": 44.3586, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011776251226692836, |
|
"grad_norm": 4.408224105834961, |
|
"learning_rate": 4.993910125649561e-06, |
|
"loss": 44.3444, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012757605495583905, |
|
"grad_norm": 4.694049835205078, |
|
"learning_rate": 4.986304738420684e-06, |
|
"loss": 44.3296, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.013738959764474975, |
|
"grad_norm": 4.30063009262085, |
|
"learning_rate": 4.975670171853926e-06, |
|
"loss": 44.3525, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.014720314033366046, |
|
"grad_norm": 4.59287691116333, |
|
"learning_rate": 4.962019382530521e-06, |
|
"loss": 44.3156, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015701668302257114, |
|
"grad_norm": 4.614467620849609, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"loss": 44.3097, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.016683022571148183, |
|
"grad_norm": 4.312943935394287, |
|
"learning_rate": 4.925739315689991e-06, |
|
"loss": 44.3131, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.017664376840039256, |
|
"grad_norm": 4.630992412567139, |
|
"learning_rate": 4.903154239845798e-06, |
|
"loss": 44.3009, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.018645731108930325, |
|
"grad_norm": 4.261321544647217, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 44.294, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.019627085377821395, |
|
"grad_norm": 4.367868423461914, |
|
"learning_rate": 4.849231551964771e-06, |
|
"loss": 44.3004, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.020608439646712464, |
|
"grad_norm": 4.4335503578186035, |
|
"learning_rate": 4.817959636416969e-06, |
|
"loss": 44.3056, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.021589793915603533, |
|
"grad_norm": 4.420947074890137, |
|
"learning_rate": 4.783863644106502e-06, |
|
"loss": 44.2962, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.022571148184494603, |
|
"grad_norm": 4.479759693145752, |
|
"learning_rate": 4.746985115747918e-06, |
|
"loss": 44.2849, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.023552502453385672, |
|
"grad_norm": 4.4245805740356445, |
|
"learning_rate": 4.707368982147318e-06, |
|
"loss": 44.3022, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02453385672227674, |
|
"grad_norm": 4.920063495635986, |
|
"learning_rate": 4.665063509461098e-06, |
|
"loss": 44.2652, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02551521099116781, |
|
"grad_norm": 4.60530424118042, |
|
"learning_rate": 4.620120240391065e-06, |
|
"loss": 44.2587, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02649656526005888, |
|
"grad_norm": 4.810507774353027, |
|
"learning_rate": 4.572593931387604e-06, |
|
"loss": 44.2725, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02747791952894995, |
|
"grad_norm": 4.324153900146484, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 44.2904, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02845927379784102, |
|
"grad_norm": 4.457505226135254, |
|
"learning_rate": 4.470026884016805e-06, |
|
"loss": 44.2651, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.029440628066732092, |
|
"grad_norm": 4.2836151123046875, |
|
"learning_rate": 4.415111107797445e-06, |
|
"loss": 44.272, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03042198233562316, |
|
"grad_norm": 4.381106853485107, |
|
"learning_rate": 4.357862063693486e-06, |
|
"loss": 44.2687, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03140333660451423, |
|
"grad_norm": 4.747635841369629, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"loss": 44.2819, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0323846908734053, |
|
"grad_norm": 4.578216075897217, |
|
"learning_rate": 4.236645926147493e-06, |
|
"loss": 44.2072, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.033366045142296366, |
|
"grad_norm": 4.499361515045166, |
|
"learning_rate": 4.172826515897146e-06, |
|
"loss": 44.2359, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03434739941118744, |
|
"grad_norm": 4.1071553230285645, |
|
"learning_rate": 4.106969024216348e-06, |
|
"loss": 44.2548, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03532875368007851, |
|
"grad_norm": 4.5971550941467285, |
|
"learning_rate": 4.039153688314146e-06, |
|
"loss": 44.2519, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03631010794896958, |
|
"grad_norm": 4.54329776763916, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 44.2257, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03729146221786065, |
|
"grad_norm": 4.047177791595459, |
|
"learning_rate": 3.897982258676867e-06, |
|
"loss": 44.2495, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.038272816486751716, |
|
"grad_norm": 3.584639549255371, |
|
"learning_rate": 3.824798160583012e-06, |
|
"loss": 44.2784, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03925417075564279, |
|
"grad_norm": 3.700026750564575, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 44.2736, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.040235525024533855, |
|
"grad_norm": 4.082536220550537, |
|
"learning_rate": 3.6736789069647273e-06, |
|
"loss": 44.2867, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04121687929342493, |
|
"grad_norm": 3.2533934116363525, |
|
"learning_rate": 3.595927866972694e-06, |
|
"loss": 44.319, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.042198233562315994, |
|
"grad_norm": 3.820909261703491, |
|
"learning_rate": 3.516841607689501e-06, |
|
"loss": 44.2602, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04317958783120707, |
|
"grad_norm": 3.5113043785095215, |
|
"learning_rate": 3.436516483539781e-06, |
|
"loss": 44.2805, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04416094210009813, |
|
"grad_norm": 3.5770328044891357, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"loss": 44.3724, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.045142296368989206, |
|
"grad_norm": 3.635390043258667, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 44.4049, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04612365063788027, |
|
"grad_norm": 3.494272232055664, |
|
"learning_rate": 3.189093389542498e-06, |
|
"loss": 44.3339, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.047105004906771344, |
|
"grad_norm": 4.5640668869018555, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"loss": 44.3798, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04808635917566242, |
|
"grad_norm": 5.724422931671143, |
|
"learning_rate": 3.019779227044398e-06, |
|
"loss": 44.29, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04906771344455348, |
|
"grad_norm": 6.1241936683654785, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"loss": 44.4495, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04906771344455348, |
|
"eval_loss": 11.050891876220703, |
|
"eval_runtime": 5.0876, |
|
"eval_samples_per_second": 337.292, |
|
"eval_steps_per_second": 42.26, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.050049067713444556, |
|
"grad_norm": 4.3872294425964355, |
|
"learning_rate": 2.847932752400164e-06, |
|
"loss": 44.1583, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.05103042198233562, |
|
"grad_norm": 4.958473205566406, |
|
"learning_rate": 2.761321158169134e-06, |
|
"loss": 44.1556, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.052011776251226695, |
|
"grad_norm": 4.641199588775635, |
|
"learning_rate": 2.6743911843603134e-06, |
|
"loss": 44.1518, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05299313052011776, |
|
"grad_norm": 4.831917762756348, |
|
"learning_rate": 2.587248741756253e-06, |
|
"loss": 44.1574, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.053974484789008834, |
|
"grad_norm": 4.641740322113037, |
|
"learning_rate": 2.5e-06, |
|
"loss": 44.177, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0549558390578999, |
|
"grad_norm": 5.027724742889404, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"loss": 44.1403, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05593719332679097, |
|
"grad_norm": 4.93941068649292, |
|
"learning_rate": 2.325608815639687e-06, |
|
"loss": 44.1304, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05691854759568204, |
|
"grad_norm": 4.8098063468933105, |
|
"learning_rate": 2.238678841830867e-06, |
|
"loss": 44.1245, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05789990186457311, |
|
"grad_norm": 4.973179340362549, |
|
"learning_rate": 2.1520672475998374e-06, |
|
"loss": 44.141, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.058881256133464184, |
|
"grad_norm": 4.688238620758057, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"loss": 44.154, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05986261040235525, |
|
"grad_norm": 4.850657939910889, |
|
"learning_rate": 1.9802207729556023e-06, |
|
"loss": 44.1483, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06084396467124632, |
|
"grad_norm": 4.537477016448975, |
|
"learning_rate": 1.895195261000831e-06, |
|
"loss": 44.1491, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06182531894013739, |
|
"grad_norm": 4.47567081451416, |
|
"learning_rate": 1.8109066104575023e-06, |
|
"loss": 44.1562, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06280667320902845, |
|
"grad_norm": 4.615531921386719, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"loss": 44.1694, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06378802747791953, |
|
"grad_norm": 4.739807605743408, |
|
"learning_rate": 1.6449496416858285e-06, |
|
"loss": 44.1339, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0647693817468106, |
|
"grad_norm": 4.978174686431885, |
|
"learning_rate": 1.56348351646022e-06, |
|
"loss": 44.1016, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06575073601570167, |
|
"grad_norm": 4.427630424499512, |
|
"learning_rate": 1.4831583923105e-06, |
|
"loss": 44.1453, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06673209028459273, |
|
"grad_norm": 4.460508823394775, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"loss": 44.1541, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06771344455348381, |
|
"grad_norm": 4.592861175537109, |
|
"learning_rate": 1.3263210930352737e-06, |
|
"loss": 44.1361, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06869479882237488, |
|
"grad_norm": 4.343865394592285, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"loss": 44.1732, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06967615309126594, |
|
"grad_norm": 4.420068264007568, |
|
"learning_rate": 1.1752018394169882e-06, |
|
"loss": 44.1383, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07065750736015702, |
|
"grad_norm": 4.614556789398193, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"loss": 44.142, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.07163886162904809, |
|
"grad_norm": 4.291627883911133, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"loss": 44.1535, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07262021589793916, |
|
"grad_norm": 4.367705345153809, |
|
"learning_rate": 9.608463116858544e-07, |
|
"loss": 44.1653, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07360157016683022, |
|
"grad_norm": 4.751893997192383, |
|
"learning_rate": 8.930309757836517e-07, |
|
"loss": 44.1325, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0745829244357213, |
|
"grad_norm": 4.403656959533691, |
|
"learning_rate": 8.271734841028553e-07, |
|
"loss": 44.1413, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07556427870461237, |
|
"grad_norm": 4.389298915863037, |
|
"learning_rate": 7.633540738525066e-07, |
|
"loss": 44.1718, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07654563297350343, |
|
"grad_norm": 4.401954174041748, |
|
"learning_rate": 7.016504991533727e-07, |
|
"loss": 44.1616, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0775269872423945, |
|
"grad_norm": 4.580694198608398, |
|
"learning_rate": 6.421379363065142e-07, |
|
"loss": 44.1418, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07850834151128558, |
|
"grad_norm": 4.462921619415283, |
|
"learning_rate": 5.848888922025553e-07, |
|
"loss": 44.1092, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07948969578017664, |
|
"grad_norm": 4.829708099365234, |
|
"learning_rate": 5.299731159831953e-07, |
|
"loss": 44.1295, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08047105004906771, |
|
"grad_norm": 4.394340991973877, |
|
"learning_rate": 4.774575140626317e-07, |
|
"loss": 44.1685, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.08145240431795878, |
|
"grad_norm": 4.454510688781738, |
|
"learning_rate": 4.27406068612396e-07, |
|
"loss": 44.1542, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.08243375858684986, |
|
"grad_norm": 4.497045040130615, |
|
"learning_rate": 3.798797596089351e-07, |
|
"loss": 44.1375, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08341511285574092, |
|
"grad_norm": 4.434609889984131, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"loss": 44.1345, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08439646712463199, |
|
"grad_norm": 4.5244035720825195, |
|
"learning_rate": 2.9263101785268253e-07, |
|
"loss": 44.1467, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08537782139352307, |
|
"grad_norm": 4.269883632659912, |
|
"learning_rate": 2.53014884252083e-07, |
|
"loss": 44.1526, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08635917566241413, |
|
"grad_norm": 4.184451103210449, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"loss": 44.1822, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0873405299313052, |
|
"grad_norm": 4.249412536621094, |
|
"learning_rate": 1.8204036358303173e-07, |
|
"loss": 44.1716, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08832188420019627, |
|
"grad_norm": 3.827402114868164, |
|
"learning_rate": 1.507684480352292e-07, |
|
"loss": 44.1924, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08930323846908735, |
|
"grad_norm": 4.114834785461426, |
|
"learning_rate": 1.223587092621162e-07, |
|
"loss": 44.2127, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.09028459273797841, |
|
"grad_norm": 3.819640636444092, |
|
"learning_rate": 9.684576015420277e-08, |
|
"loss": 44.2051, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.09126594700686948, |
|
"grad_norm": 4.212294578552246, |
|
"learning_rate": 7.426068431000883e-08, |
|
"loss": 44.1976, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.09224730127576054, |
|
"grad_norm": 3.624086618423462, |
|
"learning_rate": 5.463099816548578e-08, |
|
"loss": 44.2229, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09322865554465162, |
|
"grad_norm": 3.4577605724334717, |
|
"learning_rate": 3.798061746947995e-08, |
|
"loss": 44.2466, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09421000981354269, |
|
"grad_norm": 3.643979072570801, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"loss": 44.3109, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09519136408243375, |
|
"grad_norm": 4.863288879394531, |
|
"learning_rate": 1.3695261579316776e-08, |
|
"loss": 44.1834, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09617271835132483, |
|
"grad_norm": 3.8488268852233887, |
|
"learning_rate": 6.089874350439507e-09, |
|
"loss": 44.2992, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0971540726202159, |
|
"grad_norm": 4.9372148513793945, |
|
"learning_rate": 1.5229324522605949e-09, |
|
"loss": 44.3093, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09813542688910697, |
|
"grad_norm": 5.151540756225586, |
|
"learning_rate": 0.0, |
|
"loss": 44.4666, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09813542688910697, |
|
"eval_loss": 11.038614273071289, |
|
"eval_runtime": 5.0993, |
|
"eval_samples_per_second": 336.516, |
|
"eval_steps_per_second": 42.163, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 15407373090816.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|