|
{ |
|
"best_metric": 11.789237976074219, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.07582938388625593, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003791469194312796, |
|
"grad_norm": 0.28769180178642273, |
|
"learning_rate": 1e-05, |
|
"loss": 11.9341, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003791469194312796, |
|
"eval_loss": 11.931600570678711, |
|
"eval_runtime": 43.8048, |
|
"eval_samples_per_second": 101.404, |
|
"eval_steps_per_second": 25.363, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007582938388625592, |
|
"grad_norm": 0.26498857140541077, |
|
"learning_rate": 2e-05, |
|
"loss": 11.9335, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.001137440758293839, |
|
"grad_norm": 0.2662431299686432, |
|
"learning_rate": 3e-05, |
|
"loss": 11.9364, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0015165876777251184, |
|
"grad_norm": 0.301140695810318, |
|
"learning_rate": 4e-05, |
|
"loss": 11.9313, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0018957345971563982, |
|
"grad_norm": 0.26981547474861145, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9242, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002274881516587678, |
|
"grad_norm": 0.2758404612541199, |
|
"learning_rate": 6e-05, |
|
"loss": 11.9395, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.002654028436018957, |
|
"grad_norm": 0.3056615889072418, |
|
"learning_rate": 7e-05, |
|
"loss": 11.9271, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.003033175355450237, |
|
"grad_norm": 0.28484514355659485, |
|
"learning_rate": 8e-05, |
|
"loss": 11.9274, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0034123222748815166, |
|
"grad_norm": 0.27682986855506897, |
|
"learning_rate": 9e-05, |
|
"loss": 11.932, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0037914691943127963, |
|
"grad_norm": 0.3237661123275757, |
|
"learning_rate": 0.0001, |
|
"loss": 11.9229, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004170616113744076, |
|
"grad_norm": 0.3356820046901703, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 11.92, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.004549763033175356, |
|
"grad_norm": 0.3805263042449951, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 11.9183, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0049289099526066355, |
|
"grad_norm": 0.4529215395450592, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 11.9161, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.005308056872037914, |
|
"grad_norm": 0.4107781648635864, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 11.9106, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.005687203791469194, |
|
"grad_norm": 0.4610343277454376, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 11.91, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006066350710900474, |
|
"grad_norm": 0.45946910977363586, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 11.9126, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0064454976303317535, |
|
"grad_norm": 0.5091995000839233, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 11.9019, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.006824644549763033, |
|
"grad_norm": 0.47413066029548645, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 11.9032, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007203791469194313, |
|
"grad_norm": 0.47228190302848816, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 11.8996, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.007582938388625593, |
|
"grad_norm": 0.4501458406448364, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 11.8936, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007962085308056872, |
|
"grad_norm": 0.41325676441192627, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 11.8854, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.008341232227488152, |
|
"grad_norm": 0.43374261260032654, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 11.8845, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.008720379146919432, |
|
"grad_norm": 0.36916017532348633, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 11.8804, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.009099526066350712, |
|
"grad_norm": 0.36391785740852356, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 11.8814, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.009478672985781991, |
|
"grad_norm": 0.3365619480609894, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 11.8797, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.009857819905213271, |
|
"grad_norm": 0.3295334279537201, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 11.8717, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.010236966824644549, |
|
"grad_norm": 0.29351353645324707, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 11.8652, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.010616113744075829, |
|
"grad_norm": 0.3394353687763214, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 11.8688, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.010995260663507108, |
|
"grad_norm": 0.3891104459762573, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 11.8692, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.011374407582938388, |
|
"grad_norm": 0.26279541850090027, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 11.8697, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011753554502369668, |
|
"grad_norm": 0.29191040992736816, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 11.8641, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.012132701421800948, |
|
"grad_norm": 0.23644578456878662, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 11.8608, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.012511848341232227, |
|
"grad_norm": 0.2428828924894333, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 11.8616, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.012890995260663507, |
|
"grad_norm": 0.20023727416992188, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 11.8546, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.013270142180094787, |
|
"grad_norm": 0.2178952544927597, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 11.8558, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.013649289099526066, |
|
"grad_norm": 0.2382510006427765, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 11.8543, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.014028436018957346, |
|
"grad_norm": 0.2123047560453415, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 11.8577, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.014407582938388626, |
|
"grad_norm": 0.1820491999387741, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 11.8507, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.014786729857819906, |
|
"grad_norm": 0.2487359642982483, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 11.8598, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.015165876777251185, |
|
"grad_norm": 0.3035540282726288, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 11.8501, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.015545023696682465, |
|
"grad_norm": 0.38344883918762207, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 11.8533, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.015924170616113745, |
|
"grad_norm": 0.17882512509822845, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 11.8508, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.016303317535545023, |
|
"grad_norm": 0.16976627707481384, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 11.8501, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.016682464454976304, |
|
"grad_norm": 0.1657288819551468, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 11.838, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.017061611374407582, |
|
"grad_norm": 0.17109397053718567, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 11.8438, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.017440758293838864, |
|
"grad_norm": 0.17634600400924683, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 11.8416, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01781990521327014, |
|
"grad_norm": 0.18844150006771088, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 11.8413, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.018199052132701423, |
|
"grad_norm": 0.28906017541885376, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 11.85, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0185781990521327, |
|
"grad_norm": 0.20972122251987457, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 11.8434, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 0.24533000588417053, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 11.8367, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"eval_loss": 11.85046672821045, |
|
"eval_runtime": 44.0024, |
|
"eval_samples_per_second": 100.949, |
|
"eval_steps_per_second": 25.249, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01933649289099526, |
|
"grad_norm": 0.21670155227184296, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 11.8483, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.019715639810426542, |
|
"grad_norm": 0.1768616884946823, |
|
"learning_rate": 8.842005554284296e-05, |
|
"loss": 11.8614, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02009478672985782, |
|
"grad_norm": 0.14872843027114868, |
|
"learning_rate": 8.788574348801675e-05, |
|
"loss": 11.8544, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.020473933649289098, |
|
"grad_norm": 0.26231178641319275, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 11.8611, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02085308056872038, |
|
"grad_norm": 0.1851011961698532, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 11.8575, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.021232227488151657, |
|
"grad_norm": 0.16626429557800293, |
|
"learning_rate": 8.622126023955446e-05, |
|
"loss": 11.8542, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02161137440758294, |
|
"grad_norm": 0.1764294058084488, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 11.8449, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.021990521327014217, |
|
"grad_norm": 0.19001427292823792, |
|
"learning_rate": 8.506183921362443e-05, |
|
"loss": 11.8618, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.022369668246445498, |
|
"grad_norm": 0.20055465400218964, |
|
"learning_rate": 8.44676704559283e-05, |
|
"loss": 11.8561, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.022748815165876776, |
|
"grad_norm": 0.1839049756526947, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 11.8538, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.023127962085308058, |
|
"grad_norm": 0.16656097769737244, |
|
"learning_rate": 8.32512286056924e-05, |
|
"loss": 11.8499, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.023507109004739336, |
|
"grad_norm": 0.15637564659118652, |
|
"learning_rate": 8.262928807620843e-05, |
|
"loss": 11.8478, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.023886255924170617, |
|
"grad_norm": 0.1506025791168213, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 11.8496, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.024265402843601895, |
|
"grad_norm": 0.18512433767318726, |
|
"learning_rate": 8.135881792367686e-05, |
|
"loss": 11.8521, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.024644549763033177, |
|
"grad_norm": 0.19561633467674255, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 11.8488, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.025023696682464455, |
|
"grad_norm": 0.15633133053779602, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 11.8452, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.025402843601895736, |
|
"grad_norm": 0.15324749052524567, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 11.8427, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.025781990521327014, |
|
"grad_norm": 0.16857261955738068, |
|
"learning_rate": 7.871643313414718e-05, |
|
"loss": 11.845, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.026161137440758295, |
|
"grad_norm": 0.19223137199878693, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 11.8425, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.026540284360189573, |
|
"grad_norm": 0.19688811898231506, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 11.8418, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02691943127962085, |
|
"grad_norm": 0.16625766456127167, |
|
"learning_rate": 7.66515864363997e-05, |
|
"loss": 11.8403, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.027298578199052133, |
|
"grad_norm": 0.1805586814880371, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 11.8356, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.02767772511848341, |
|
"grad_norm": 0.18211770057678223, |
|
"learning_rate": 7.52382768867422e-05, |
|
"loss": 11.8378, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.028056872037914692, |
|
"grad_norm": 0.1834656447172165, |
|
"learning_rate": 7.452117519152542e-05, |
|
"loss": 11.8259, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.02843601895734597, |
|
"grad_norm": 0.16003456711769104, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 11.8347, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02881516587677725, |
|
"grad_norm": 0.1905023753643036, |
|
"learning_rate": 7.30670581489344e-05, |
|
"loss": 11.8263, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.02919431279620853, |
|
"grad_norm": 0.14761149883270264, |
|
"learning_rate": 7.233044034264034e-05, |
|
"loss": 11.8357, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.02957345971563981, |
|
"grad_norm": 0.1856553852558136, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 11.8237, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.02995260663507109, |
|
"grad_norm": 0.14715948700904846, |
|
"learning_rate": 7.083909302476453e-05, |
|
"loss": 11.8252, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03033175355450237, |
|
"grad_norm": 0.1759246587753296, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 11.8251, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03071090047393365, |
|
"grad_norm": 0.13867217302322388, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 11.8188, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.03109004739336493, |
|
"grad_norm": 0.12833894789218903, |
|
"learning_rate": 6.855986244591104e-05, |
|
"loss": 11.8229, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03146919431279621, |
|
"grad_norm": 0.12929992377758026, |
|
"learning_rate": 6.778969234612584e-05, |
|
"loss": 11.8198, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03184834123222749, |
|
"grad_norm": 0.1879982054233551, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 11.8211, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.03222748815165877, |
|
"grad_norm": 0.13484127819538116, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 11.8133, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.032606635071090045, |
|
"grad_norm": 0.16426604986190796, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 11.8192, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.03298578199052133, |
|
"grad_norm": 0.15773631632328033, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 11.8171, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.03336492890995261, |
|
"grad_norm": 0.1757354736328125, |
|
"learning_rate": 6.387014543809223e-05, |
|
"loss": 11.8124, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.033744075829383886, |
|
"grad_norm": 0.1542123258113861, |
|
"learning_rate": 6.307399704769099e-05, |
|
"loss": 11.8166, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.034123222748815164, |
|
"grad_norm": 0.16527996957302094, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 11.8206, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03450236966824644, |
|
"grad_norm": 0.16867831349372864, |
|
"learning_rate": 6.147119600233758e-05, |
|
"loss": 11.8075, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.03488151658767773, |
|
"grad_norm": 0.15023113787174225, |
|
"learning_rate": 6.066498153718735e-05, |
|
"loss": 11.8108, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.035260663507109005, |
|
"grad_norm": 0.14039337635040283, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 11.7986, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.03563981042654028, |
|
"grad_norm": 0.18728798627853394, |
|
"learning_rate": 5.90440267166055e-05, |
|
"loss": 11.8061, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.03601895734597156, |
|
"grad_norm": 0.21996043622493744, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 11.8135, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.036398104265402846, |
|
"grad_norm": 0.20277753472328186, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 11.8046, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.036777251184834124, |
|
"grad_norm": 0.18947215378284454, |
|
"learning_rate": 5.6594608567103456e-05, |
|
"loss": 11.8009, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0371563981042654, |
|
"grad_norm": 0.19484518468379974, |
|
"learning_rate": 5.577423184847932e-05, |
|
"loss": 11.8014, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.03753554502369668, |
|
"grad_norm": 0.2266094982624054, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 11.7963, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.037914691943127965, |
|
"grad_norm": 0.23149214684963226, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 11.7938, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.037914691943127965, |
|
"eval_loss": 11.812297821044922, |
|
"eval_runtime": 43.9061, |
|
"eval_samples_per_second": 101.17, |
|
"eval_steps_per_second": 25.304, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03829383886255924, |
|
"grad_norm": 0.16850636899471283, |
|
"learning_rate": 5.330452921628497e-05, |
|
"loss": 11.8059, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.03867298578199052, |
|
"grad_norm": 0.1942441463470459, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 11.815, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0390521327014218, |
|
"grad_norm": 0.19246946275234222, |
|
"learning_rate": 5.165316846586541e-05, |
|
"loss": 11.8159, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.039431279620853084, |
|
"grad_norm": 0.18047946691513062, |
|
"learning_rate": 5.0826697238317935e-05, |
|
"loss": 11.8168, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.03981042654028436, |
|
"grad_norm": 0.16079886257648468, |
|
"learning_rate": 5e-05, |
|
"loss": 11.813, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.04018957345971564, |
|
"grad_norm": 0.17524497210979462, |
|
"learning_rate": 4.917330276168208e-05, |
|
"loss": 11.8219, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.04056872037914692, |
|
"grad_norm": 0.14713990688323975, |
|
"learning_rate": 4.834683153413459e-05, |
|
"loss": 11.8148, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.040947867298578196, |
|
"grad_norm": 0.127065971493721, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 11.821, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04132701421800948, |
|
"grad_norm": 0.10191242396831512, |
|
"learning_rate": 4.669547078371504e-05, |
|
"loss": 11.8177, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.04170616113744076, |
|
"grad_norm": 0.12673281133174896, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 11.8152, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04208530805687204, |
|
"grad_norm": 0.14365102350711823, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 11.8153, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.042464454976303315, |
|
"grad_norm": 0.13751044869422913, |
|
"learning_rate": 4.4225768151520694e-05, |
|
"loss": 11.8114, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0428436018957346, |
|
"grad_norm": 0.14924171566963196, |
|
"learning_rate": 4.3405391432896555e-05, |
|
"loss": 11.8159, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.04322274881516588, |
|
"grad_norm": 0.10858994722366333, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 11.8082, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.043601895734597156, |
|
"grad_norm": 0.1294364482164383, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 11.8152, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.043981042654028434, |
|
"grad_norm": 0.1143607646226883, |
|
"learning_rate": 4.095597328339452e-05, |
|
"loss": 11.8159, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.04436018957345972, |
|
"grad_norm": 0.10796434432268143, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 11.8102, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.044739336492890996, |
|
"grad_norm": 0.16013963520526886, |
|
"learning_rate": 3.933501846281267e-05, |
|
"loss": 11.8237, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.045118483412322274, |
|
"grad_norm": 0.10867973417043686, |
|
"learning_rate": 3.852880399766243e-05, |
|
"loss": 11.807, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.04549763033175355, |
|
"grad_norm": 0.14788459241390228, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 11.8125, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04587677725118484, |
|
"grad_norm": 0.14955635368824005, |
|
"learning_rate": 3.6926002952309016e-05, |
|
"loss": 11.8122, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.046255924170616115, |
|
"grad_norm": 0.1232515349984169, |
|
"learning_rate": 3.612985456190778e-05, |
|
"loss": 11.81, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.04663507109004739, |
|
"grad_norm": 0.12674307823181152, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 11.8097, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.04701421800947867, |
|
"grad_norm": 0.10738085210323334, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 11.8108, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.04739336492890995, |
|
"grad_norm": 0.11420100927352905, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 11.7998, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.047772511848341234, |
|
"grad_norm": 0.1361931413412094, |
|
"learning_rate": 3.298534127791785e-05, |
|
"loss": 11.8028, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.04815165876777251, |
|
"grad_norm": 0.12988242506980896, |
|
"learning_rate": 3.221030765387417e-05, |
|
"loss": 11.7993, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.04853080568720379, |
|
"grad_norm": 0.18020643293857574, |
|
"learning_rate": 3.144013755408895e-05, |
|
"loss": 11.8009, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.04890995260663507, |
|
"grad_norm": 0.1143268421292305, |
|
"learning_rate": 3.0675041535377405e-05, |
|
"loss": 11.8097, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.04928909952606635, |
|
"grad_norm": 0.13046760857105255, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 11.8051, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04966824644549763, |
|
"grad_norm": 0.12067389488220215, |
|
"learning_rate": 2.916090697523549e-05, |
|
"loss": 11.8046, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.05004739336492891, |
|
"grad_norm": 0.13390123844146729, |
|
"learning_rate": 2.8412282383075363e-05, |
|
"loss": 11.8001, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.05042654028436019, |
|
"grad_norm": 0.1690846085548401, |
|
"learning_rate": 2.766955965735968e-05, |
|
"loss": 11.7989, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.05080568720379147, |
|
"grad_norm": 0.1262759566307068, |
|
"learning_rate": 2.693294185106562e-05, |
|
"loss": 11.7955, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.05118483412322275, |
|
"grad_norm": 0.1195095032453537, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 11.7831, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.05156398104265403, |
|
"grad_norm": 0.1531498283147812, |
|
"learning_rate": 2.547882480847461e-05, |
|
"loss": 11.7925, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.051943127962085306, |
|
"grad_norm": 0.1306382119655609, |
|
"learning_rate": 2.476172311325783e-05, |
|
"loss": 11.7897, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.05232227488151659, |
|
"grad_norm": 0.17050689458847046, |
|
"learning_rate": 2.405152131093926e-05, |
|
"loss": 11.7833, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.05270142180094787, |
|
"grad_norm": 0.1543818861246109, |
|
"learning_rate": 2.3348413563600325e-05, |
|
"loss": 11.7778, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.05308056872037915, |
|
"grad_norm": 0.1629064679145813, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 11.7884, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.053459715639810425, |
|
"grad_norm": 0.15135161578655243, |
|
"learning_rate": 2.196424713241637e-05, |
|
"loss": 11.7921, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.0538388625592417, |
|
"grad_norm": 0.13913819193840027, |
|
"learning_rate": 2.128356686585282e-05, |
|
"loss": 11.7866, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.05421800947867299, |
|
"grad_norm": 0.14062654972076416, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 11.7869, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.054597156398104266, |
|
"grad_norm": 0.18874435126781464, |
|
"learning_rate": 1.9945942635848748e-05, |
|
"loss": 11.7835, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.054976303317535544, |
|
"grad_norm": 0.15453287959098816, |
|
"learning_rate": 1.928936436551661e-05, |
|
"loss": 11.7864, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.05535545023696682, |
|
"grad_norm": 0.1882849633693695, |
|
"learning_rate": 1.8641182076323148e-05, |
|
"loss": 11.7744, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.05573459715639811, |
|
"grad_norm": 0.19366946816444397, |
|
"learning_rate": 1.800157297483417e-05, |
|
"loss": 11.7785, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.056113744075829385, |
|
"grad_norm": 0.20455895364284515, |
|
"learning_rate": 1.7370711923791567e-05, |
|
"loss": 11.7727, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.05649289099526066, |
|
"grad_norm": 0.23381587862968445, |
|
"learning_rate": 1.6748771394307585e-05, |
|
"loss": 11.7837, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.05687203791469194, |
|
"grad_norm": 0.21379578113555908, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 11.7728, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05687203791469194, |
|
"eval_loss": 11.7927827835083, |
|
"eval_runtime": 43.8842, |
|
"eval_samples_per_second": 101.221, |
|
"eval_steps_per_second": 25.317, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.057251184834123225, |
|
"grad_norm": 0.16750890016555786, |
|
"learning_rate": 1.553232954407171e-05, |
|
"loss": 11.7841, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.0576303317535545, |
|
"grad_norm": 0.15428869426250458, |
|
"learning_rate": 1.4938160786375572e-05, |
|
"loss": 11.7955, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.05800947867298578, |
|
"grad_norm": 0.11902543902397156, |
|
"learning_rate": 1.435357758543015e-05, |
|
"loss": 11.7913, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.05838862559241706, |
|
"grad_norm": 0.13620445132255554, |
|
"learning_rate": 1.3778739760445552e-05, |
|
"loss": 11.8091, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.058767772511848344, |
|
"grad_norm": 0.14771781861782074, |
|
"learning_rate": 1.3213804466343421e-05, |
|
"loss": 11.7953, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.05914691943127962, |
|
"grad_norm": 0.12858200073242188, |
|
"learning_rate": 1.2658926150792322e-05, |
|
"loss": 11.8072, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.0595260663507109, |
|
"grad_norm": 0.12467218935489655, |
|
"learning_rate": 1.2114256511983274e-05, |
|
"loss": 11.8091, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.05990521327014218, |
|
"grad_norm": 0.11657291650772095, |
|
"learning_rate": 1.157994445715706e-05, |
|
"loss": 11.7938, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.060284360189573456, |
|
"grad_norm": 0.13835157454013824, |
|
"learning_rate": 1.1056136061894384e-05, |
|
"loss": 11.808, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.06066350710900474, |
|
"grad_norm": 0.1299125999212265, |
|
"learning_rate": 1.0542974530180327e-05, |
|
"loss": 11.8075, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06104265402843602, |
|
"grad_norm": 0.12044967710971832, |
|
"learning_rate": 1.0040600155253765e-05, |
|
"loss": 11.8016, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.0614218009478673, |
|
"grad_norm": 0.1253618597984314, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 11.7931, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.061800947867298575, |
|
"grad_norm": 0.1312033236026764, |
|
"learning_rate": 9.068759265665384e-06, |
|
"loss": 11.798, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.06218009478672986, |
|
"grad_norm": 0.14857697486877441, |
|
"learning_rate": 8.599558442598998e-06, |
|
"loss": 11.7953, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.06255924170616113, |
|
"grad_norm": 0.15264755487442017, |
|
"learning_rate": 8.141676086873572e-06, |
|
"loss": 11.7924, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.06293838862559242, |
|
"grad_norm": 0.11737260222434998, |
|
"learning_rate": 7.695237378953223e-06, |
|
"loss": 11.8009, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.0633175355450237, |
|
"grad_norm": 0.11783807724714279, |
|
"learning_rate": 7.260364370723044e-06, |
|
"loss": 11.7974, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.06369668246445498, |
|
"grad_norm": 0.10636231303215027, |
|
"learning_rate": 6.837175952121306e-06, |
|
"loss": 11.7982, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.06407582938388626, |
|
"grad_norm": 0.12794683873653412, |
|
"learning_rate": 6.425787818636131e-06, |
|
"loss": 11.8013, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.06445497630331753, |
|
"grad_norm": 0.1254904866218567, |
|
"learning_rate": 6.026312439675552e-06, |
|
"loss": 11.7946, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06483412322274881, |
|
"grad_norm": 0.1281624734401703, |
|
"learning_rate": 5.6388590278194096e-06, |
|
"loss": 11.8008, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.06521327014218009, |
|
"grad_norm": 0.10411489754915237, |
|
"learning_rate": 5.263533508961827e-06, |
|
"loss": 11.7897, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.06559241706161137, |
|
"grad_norm": 0.11528629809617996, |
|
"learning_rate": 4.900438493352055e-06, |
|
"loss": 11.7978, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.06597156398104266, |
|
"grad_norm": 0.130488783121109, |
|
"learning_rate": 4.549673247541875e-06, |
|
"loss": 11.7993, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.06635071090047394, |
|
"grad_norm": 0.10437383502721786, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 11.7945, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06672985781990522, |
|
"grad_norm": 0.1090942770242691, |
|
"learning_rate": 3.885512251130763e-06, |
|
"loss": 11.7843, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.0671090047393365, |
|
"grad_norm": 0.12790200114250183, |
|
"learning_rate": 3.5722980755146517e-06, |
|
"loss": 11.785, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.06748815165876777, |
|
"grad_norm": 0.14767682552337646, |
|
"learning_rate": 3.271776770026963e-06, |
|
"loss": 11.7857, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.06786729857819905, |
|
"grad_norm": 0.15827041864395142, |
|
"learning_rate": 2.9840304941919415e-06, |
|
"loss": 11.8018, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.06824644549763033, |
|
"grad_norm": 0.13997766375541687, |
|
"learning_rate": 2.7091379149682685e-06, |
|
"loss": 11.7856, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0686255924170616, |
|
"grad_norm": 0.1263304501771927, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 11.7963, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.06900473933649288, |
|
"grad_norm": 0.14048518240451813, |
|
"learning_rate": 2.1982109232821178e-06, |
|
"loss": 11.7878, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.06938388625592418, |
|
"grad_norm": 0.1328687071800232, |
|
"learning_rate": 1.962316193157593e-06, |
|
"loss": 11.7846, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.06976303317535545, |
|
"grad_norm": 0.1481953263282776, |
|
"learning_rate": 1.7395544861325718e-06, |
|
"loss": 11.7851, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.07014218009478673, |
|
"grad_norm": 0.14546702802181244, |
|
"learning_rate": 1.5299867030334814e-06, |
|
"loss": 11.788, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.07052132701421801, |
|
"grad_norm": 0.12582269310951233, |
|
"learning_rate": 1.333670137599713e-06, |
|
"loss": 11.7925, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.07090047393364929, |
|
"grad_norm": 0.14950110018253326, |
|
"learning_rate": 1.1506584608200367e-06, |
|
"loss": 11.7883, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.07127962085308057, |
|
"grad_norm": 0.12403721362352371, |
|
"learning_rate": 9.810017062595322e-07, |
|
"loss": 11.7848, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.07165876777251184, |
|
"grad_norm": 0.1594034880399704, |
|
"learning_rate": 8.247462563808817e-07, |
|
"loss": 11.774, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.07203791469194312, |
|
"grad_norm": 0.15566158294677734, |
|
"learning_rate": 6.819348298638839e-07, |
|
"loss": 11.7819, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07241706161137441, |
|
"grad_norm": 0.14381428062915802, |
|
"learning_rate": 5.526064699265753e-07, |
|
"loss": 11.7861, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.07279620853080569, |
|
"grad_norm": 0.1404116153717041, |
|
"learning_rate": 4.367965336512403e-07, |
|
"loss": 11.778, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.07317535545023697, |
|
"grad_norm": 0.14419354498386383, |
|
"learning_rate": 3.3453668231809286e-07, |
|
"loss": 11.7789, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.07355450236966825, |
|
"grad_norm": 0.18890376389026642, |
|
"learning_rate": 2.458548727494292e-07, |
|
"loss": 11.774, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.07393364928909953, |
|
"grad_norm": 0.1897961050271988, |
|
"learning_rate": 1.7077534966650766e-07, |
|
"loss": 11.774, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0743127962085308, |
|
"grad_norm": 0.16154845058918, |
|
"learning_rate": 1.0931863906127327e-07, |
|
"loss": 11.7702, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.07469194312796208, |
|
"grad_norm": 0.15937910974025726, |
|
"learning_rate": 6.150154258476315e-08, |
|
"loss": 11.7803, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.07507109004739336, |
|
"grad_norm": 0.212022602558136, |
|
"learning_rate": 2.7337132953697554e-08, |
|
"loss": 11.7682, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.07545023696682464, |
|
"grad_norm": 0.21921372413635254, |
|
"learning_rate": 6.834750376549792e-09, |
|
"loss": 11.7774, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.07582938388625593, |
|
"grad_norm": 0.2652130424976349, |
|
"learning_rate": 0.0, |
|
"loss": 11.7693, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07582938388625593, |
|
"eval_loss": 11.789237976074219, |
|
"eval_runtime": 43.9011, |
|
"eval_samples_per_second": 101.182, |
|
"eval_steps_per_second": 25.307, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 226142650368000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|