|
{ |
|
"best_metric": 4.415204048156738, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.0379416646905383, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000379416646905383, |
|
"grad_norm": 4.444539546966553, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 3.6119, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000379416646905383, |
|
"eval_loss": 5.630036354064941, |
|
"eval_runtime": 70.5773, |
|
"eval_samples_per_second": 62.896, |
|
"eval_steps_per_second": 15.727, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000758833293810766, |
|
"grad_norm": 10.298806190490723, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 4.4198, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0011382499407161489, |
|
"grad_norm": 10.508999824523926, |
|
"learning_rate": 1.5e-06, |
|
"loss": 4.3823, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.001517666587621532, |
|
"grad_norm": 14.867856979370117, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 5.3149, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.001897083234526915, |
|
"grad_norm": 11.521822929382324, |
|
"learning_rate": 2.5e-06, |
|
"loss": 4.957, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0022764998814322978, |
|
"grad_norm": 10.65507984161377, |
|
"learning_rate": 3e-06, |
|
"loss": 5.0594, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.002655916528337681, |
|
"grad_norm": 14.449377059936523, |
|
"learning_rate": 3.5e-06, |
|
"loss": 5.4046, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.003035333175243064, |
|
"grad_norm": 15.257190704345703, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 5.4988, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.003414749822148447, |
|
"grad_norm": 15.488490104675293, |
|
"learning_rate": 4.5e-06, |
|
"loss": 4.9476, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00379416646905383, |
|
"grad_norm": 15.310659408569336, |
|
"learning_rate": 5e-06, |
|
"loss": 5.2732, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0041735831159592125, |
|
"grad_norm": 10.856189727783203, |
|
"learning_rate": 4.99847706754774e-06, |
|
"loss": 5.0276, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0045529997628645956, |
|
"grad_norm": 17.822044372558594, |
|
"learning_rate": 4.993910125649561e-06, |
|
"loss": 5.2223, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.004932416409769979, |
|
"grad_norm": 11.17652702331543, |
|
"learning_rate": 4.986304738420684e-06, |
|
"loss": 4.818, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.005311833056675362, |
|
"grad_norm": 16.337657928466797, |
|
"learning_rate": 4.975670171853926e-06, |
|
"loss": 5.4286, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.005691249703580745, |
|
"grad_norm": 16.31676483154297, |
|
"learning_rate": 4.962019382530521e-06, |
|
"loss": 5.0845, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006070666350486128, |
|
"grad_norm": 13.25898551940918, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"loss": 5.1326, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.006450082997391511, |
|
"grad_norm": 15.131325721740723, |
|
"learning_rate": 4.925739315689991e-06, |
|
"loss": 5.0989, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.006829499644296894, |
|
"grad_norm": 14.353851318359375, |
|
"learning_rate": 4.903154239845798e-06, |
|
"loss": 5.0136, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007208916291202277, |
|
"grad_norm": 13.283928871154785, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 5.5043, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00758833293810766, |
|
"grad_norm": 13.317709922790527, |
|
"learning_rate": 4.849231551964771e-06, |
|
"loss": 5.1231, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007967749585013043, |
|
"grad_norm": 10.19308853149414, |
|
"learning_rate": 4.817959636416969e-06, |
|
"loss": 5.0222, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.008347166231918425, |
|
"grad_norm": 12.509997367858887, |
|
"learning_rate": 4.783863644106502e-06, |
|
"loss": 5.0395, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.008726582878823809, |
|
"grad_norm": 13.469011306762695, |
|
"learning_rate": 4.746985115747918e-06, |
|
"loss": 4.9736, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.009105999525729191, |
|
"grad_norm": 13.559428215026855, |
|
"learning_rate": 4.707368982147318e-06, |
|
"loss": 5.1422, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.009485416172634575, |
|
"grad_norm": 12.164199829101562, |
|
"learning_rate": 4.665063509461098e-06, |
|
"loss": 5.0459, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.009864832819539957, |
|
"grad_norm": 12.601916313171387, |
|
"learning_rate": 4.620120240391065e-06, |
|
"loss": 5.5161, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.010244249466445341, |
|
"grad_norm": 10.389106750488281, |
|
"learning_rate": 4.572593931387604e-06, |
|
"loss": 4.7012, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.010623666113350723, |
|
"grad_norm": 11.981220245361328, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 5.2306, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.011003082760256105, |
|
"grad_norm": 8.926562309265137, |
|
"learning_rate": 4.470026884016805e-06, |
|
"loss": 5.2015, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01138249940716149, |
|
"grad_norm": 11.07724666595459, |
|
"learning_rate": 4.415111107797445e-06, |
|
"loss": 5.3189, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011761916054066872, |
|
"grad_norm": 23.328981399536133, |
|
"learning_rate": 4.357862063693486e-06, |
|
"loss": 5.2879, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.012141332700972255, |
|
"grad_norm": 9.831049919128418, |
|
"learning_rate": 4.2983495008466285e-06, |
|
"loss": 5.2354, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.012520749347877638, |
|
"grad_norm": 10.883379936218262, |
|
"learning_rate": 4.236645926147493e-06, |
|
"loss": 5.101, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.012900165994783021, |
|
"grad_norm": 13.54520034790039, |
|
"learning_rate": 4.172826515897146e-06, |
|
"loss": 5.3399, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.013279582641688404, |
|
"grad_norm": 9.359816551208496, |
|
"learning_rate": 4.106969024216348e-06, |
|
"loss": 5.0393, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.013658999288593788, |
|
"grad_norm": 8.04438304901123, |
|
"learning_rate": 4.039153688314146e-06, |
|
"loss": 5.1888, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01403841593549917, |
|
"grad_norm": 7.696422100067139, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 5.0935, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.014417832582404554, |
|
"grad_norm": 9.850431442260742, |
|
"learning_rate": 3.897982258676867e-06, |
|
"loss": 5.3628, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.014797249229309936, |
|
"grad_norm": 17.82948112487793, |
|
"learning_rate": 3.824798160583012e-06, |
|
"loss": 5.2962, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01517666587621532, |
|
"grad_norm": 8.378523826599121, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 4.873, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.015556082523120702, |
|
"grad_norm": 9.744098663330078, |
|
"learning_rate": 3.6736789069647273e-06, |
|
"loss": 5.1868, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.015935499170026086, |
|
"grad_norm": 8.492013931274414, |
|
"learning_rate": 3.595927866972694e-06, |
|
"loss": 5.3123, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.016314915816931466, |
|
"grad_norm": 9.203422546386719, |
|
"learning_rate": 3.516841607689501e-06, |
|
"loss": 5.1639, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.01669433246383685, |
|
"grad_norm": 8.369019508361816, |
|
"learning_rate": 3.436516483539781e-06, |
|
"loss": 5.0934, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.017073749110742234, |
|
"grad_norm": 8.947461128234863, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"loss": 5.1892, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.017453165757647618, |
|
"grad_norm": 10.578983306884766, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 5.6706, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.017832582404553, |
|
"grad_norm": 9.356407165527344, |
|
"learning_rate": 3.189093389542498e-06, |
|
"loss": 5.6261, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.018211999051458382, |
|
"grad_norm": 9.004945755004883, |
|
"learning_rate": 3.1048047389991693e-06, |
|
"loss": 5.3589, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.018591415698363766, |
|
"grad_norm": 8.144246101379395, |
|
"learning_rate": 3.019779227044398e-06, |
|
"loss": 5.4095, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.01897083234526915, |
|
"grad_norm": 9.677080154418945, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"loss": 5.7475, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01897083234526915, |
|
"eval_loss": 4.675076484680176, |
|
"eval_runtime": 70.0014, |
|
"eval_samples_per_second": 63.413, |
|
"eval_steps_per_second": 15.857, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01935024899217453, |
|
"grad_norm": 3.7669692039489746, |
|
"learning_rate": 2.847932752400164e-06, |
|
"loss": 4.0896, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.019729665639079914, |
|
"grad_norm": 4.779418468475342, |
|
"learning_rate": 2.761321158169134e-06, |
|
"loss": 3.8922, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0201090822859853, |
|
"grad_norm": 5.031223297119141, |
|
"learning_rate": 2.6743911843603134e-06, |
|
"loss": 4.1811, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.020488498932890682, |
|
"grad_norm": 4.953243255615234, |
|
"learning_rate": 2.587248741756253e-06, |
|
"loss": 3.7881, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.020867915579796063, |
|
"grad_norm": 5.7064208984375, |
|
"learning_rate": 2.5e-06, |
|
"loss": 4.0777, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.021247332226701447, |
|
"grad_norm": 5.718916893005371, |
|
"learning_rate": 2.4127512582437486e-06, |
|
"loss": 4.1119, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02162674887360683, |
|
"grad_norm": 6.402388572692871, |
|
"learning_rate": 2.325608815639687e-06, |
|
"loss": 4.0965, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02200616552051221, |
|
"grad_norm": 6.114238739013672, |
|
"learning_rate": 2.238678841830867e-06, |
|
"loss": 4.1586, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.022385582167417595, |
|
"grad_norm": 5.461117267608643, |
|
"learning_rate": 2.1520672475998374e-06, |
|
"loss": 3.6969, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02276499881432298, |
|
"grad_norm": 6.01952600479126, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"loss": 4.0374, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.023144415461228363, |
|
"grad_norm": 6.108837604522705, |
|
"learning_rate": 1.9802207729556023e-06, |
|
"loss": 4.3124, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.023523832108133743, |
|
"grad_norm": 6.704663276672363, |
|
"learning_rate": 1.895195261000831e-06, |
|
"loss": 3.8151, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.023903248755039127, |
|
"grad_norm": 6.462284088134766, |
|
"learning_rate": 1.8109066104575023e-06, |
|
"loss": 4.3228, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.02428266540194451, |
|
"grad_norm": 7.066595554351807, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"loss": 4.1603, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.024662082048849895, |
|
"grad_norm": 6.231473922729492, |
|
"learning_rate": 1.6449496416858285e-06, |
|
"loss": 4.3381, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.025041498695755275, |
|
"grad_norm": 6.865777492523193, |
|
"learning_rate": 1.56348351646022e-06, |
|
"loss": 3.7183, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.02542091534266066, |
|
"grad_norm": 6.609790802001953, |
|
"learning_rate": 1.4831583923105e-06, |
|
"loss": 4.5205, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.025800331989566043, |
|
"grad_norm": 6.334497928619385, |
|
"learning_rate": 1.4040721330273063e-06, |
|
"loss": 4.3837, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.026179748636471427, |
|
"grad_norm": 7.466740608215332, |
|
"learning_rate": 1.3263210930352737e-06, |
|
"loss": 4.3184, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.026559165283376807, |
|
"grad_norm": 7.18544340133667, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"loss": 4.3352, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02693858193028219, |
|
"grad_norm": 6.762673377990723, |
|
"learning_rate": 1.1752018394169882e-06, |
|
"loss": 4.504, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.027317998577187575, |
|
"grad_norm": 5.527563095092773, |
|
"learning_rate": 1.1020177413231334e-06, |
|
"loss": 4.1255, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.027697415224092956, |
|
"grad_norm": 6.735006332397461, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"loss": 4.3378, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.02807683187099834, |
|
"grad_norm": 6.313520431518555, |
|
"learning_rate": 9.608463116858544e-07, |
|
"loss": 3.8856, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.028456248517903723, |
|
"grad_norm": 6.748076915740967, |
|
"learning_rate": 8.930309757836517e-07, |
|
"loss": 4.1941, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.028835665164809107, |
|
"grad_norm": 7.661444187164307, |
|
"learning_rate": 8.271734841028553e-07, |
|
"loss": 4.2481, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.029215081811714488, |
|
"grad_norm": 7.217004299163818, |
|
"learning_rate": 7.633540738525066e-07, |
|
"loss": 4.3925, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.02959449845861987, |
|
"grad_norm": 7.152622222900391, |
|
"learning_rate": 7.016504991533727e-07, |
|
"loss": 3.8941, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.029973915105525255, |
|
"grad_norm": 6.765809059143066, |
|
"learning_rate": 6.421379363065142e-07, |
|
"loss": 4.5282, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03035333175243064, |
|
"grad_norm": 6.723090171813965, |
|
"learning_rate": 5.848888922025553e-07, |
|
"loss": 4.3739, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03073274839933602, |
|
"grad_norm": 7.00840950012207, |
|
"learning_rate": 5.299731159831953e-07, |
|
"loss": 4.2958, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.031112165046241404, |
|
"grad_norm": 6.291935443878174, |
|
"learning_rate": 4.774575140626317e-07, |
|
"loss": 3.9951, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03149158169314679, |
|
"grad_norm": 6.735934734344482, |
|
"learning_rate": 4.27406068612396e-07, |
|
"loss": 4.4475, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03187099834005217, |
|
"grad_norm": 7.057698726654053, |
|
"learning_rate": 3.798797596089351e-07, |
|
"loss": 4.248, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.032250414986957555, |
|
"grad_norm": 7.530543804168701, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"loss": 4.8054, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03262983163386293, |
|
"grad_norm": 11.086931228637695, |
|
"learning_rate": 2.9263101785268253e-07, |
|
"loss": 4.5513, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.033009248280768316, |
|
"grad_norm": 6.664738655090332, |
|
"learning_rate": 2.53014884252083e-07, |
|
"loss": 4.3811, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0333886649276737, |
|
"grad_norm": 7.505542278289795, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"loss": 4.5977, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.033768081574579084, |
|
"grad_norm": 7.716992378234863, |
|
"learning_rate": 1.8204036358303173e-07, |
|
"loss": 4.8648, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.03414749822148447, |
|
"grad_norm": 9.030020713806152, |
|
"learning_rate": 1.507684480352292e-07, |
|
"loss": 4.5738, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03452691486838985, |
|
"grad_norm": 7.875784873962402, |
|
"learning_rate": 1.223587092621162e-07, |
|
"loss": 4.5702, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.034906331515295236, |
|
"grad_norm": 6.821437358856201, |
|
"learning_rate": 9.684576015420277e-08, |
|
"loss": 4.7577, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.03528574816220062, |
|
"grad_norm": 7.495672225952148, |
|
"learning_rate": 7.426068431000883e-08, |
|
"loss": 4.5927, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.035665164809106, |
|
"grad_norm": 8.056374549865723, |
|
"learning_rate": 5.463099816548578e-08, |
|
"loss": 4.8237, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.03604458145601138, |
|
"grad_norm": 8.059727668762207, |
|
"learning_rate": 3.798061746947995e-08, |
|
"loss": 5.0902, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.036423998102916764, |
|
"grad_norm": 8.202387809753418, |
|
"learning_rate": 2.4329828146074096e-08, |
|
"loss": 4.8697, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.03680341474982215, |
|
"grad_norm": 8.158020973205566, |
|
"learning_rate": 1.3695261579316776e-08, |
|
"loss": 4.7059, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.03718283139672753, |
|
"grad_norm": 8.748244285583496, |
|
"learning_rate": 6.089874350439507e-09, |
|
"loss": 4.7764, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.037562248043632916, |
|
"grad_norm": 8.805473327636719, |
|
"learning_rate": 1.5229324522605949e-09, |
|
"loss": 5.3301, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0379416646905383, |
|
"grad_norm": 8.97905158996582, |
|
"learning_rate": 0.0, |
|
"loss": 5.5488, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0379416646905383, |
|
"eval_loss": 4.415204048156738, |
|
"eval_runtime": 70.0282, |
|
"eval_samples_per_second": 63.389, |
|
"eval_steps_per_second": 15.851, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6212314516684800.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|