|
{ |
|
"best_metric": 1.97798752784729, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.007456287514446557, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.456287514446557e-05, |
|
"grad_norm": 0.43125054240226746, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.764, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 7.456287514446557e-05, |
|
"eval_loss": 2.176044464111328, |
|
"eval_runtime": 1967.9509, |
|
"eval_samples_per_second": 11.478, |
|
"eval_steps_per_second": 5.739, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00014912575028893114, |
|
"grad_norm": 0.4944254457950592, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 2.0124, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0002236886254333967, |
|
"grad_norm": 0.5164911150932312, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0545, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0002982515005778623, |
|
"grad_norm": 0.5371212363243103, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 2.1354, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00037281437572232783, |
|
"grad_norm": 0.5677233338356018, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.0822, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0004473772508667934, |
|
"grad_norm": 0.5842496156692505, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1827, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.000521940126011259, |
|
"grad_norm": 0.5665204524993896, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 2.1415, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0005965030011557246, |
|
"grad_norm": 0.5413942337036133, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 2.0118, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0006710658763001902, |
|
"grad_norm": 0.5594425201416016, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0801, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0007456287514446557, |
|
"grad_norm": 0.553996741771698, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.0225, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0008201916265891213, |
|
"grad_norm": 0.5754431486129761, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 2.064, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0008947545017335869, |
|
"grad_norm": 0.6240922808647156, |
|
"learning_rate": 4e-05, |
|
"loss": 2.126, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0009693173768780525, |
|
"grad_norm": 0.5755719542503357, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 2.0683, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.001043880252022518, |
|
"grad_norm": 0.6122748851776123, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 2.0893, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0011184431271669835, |
|
"grad_norm": 0.6614731550216675, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1551, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0011930060023114491, |
|
"grad_norm": 0.6139146685600281, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 1.959, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0012675688774559147, |
|
"grad_norm": 0.7065208554267883, |
|
"learning_rate": 5.666666666666667e-05, |
|
"loss": 2.015, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0013421317526003803, |
|
"grad_norm": 0.706400990486145, |
|
"learning_rate": 6e-05, |
|
"loss": 2.0351, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001416694627744846, |
|
"grad_norm": 0.6575025916099548, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 2.0483, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0014912575028893113, |
|
"grad_norm": 0.6933155059814453, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.0612, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.001565820378033777, |
|
"grad_norm": 0.6782702207565308, |
|
"learning_rate": 7e-05, |
|
"loss": 2.0763, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0016403832531782425, |
|
"grad_norm": 0.671043872833252, |
|
"learning_rate": 7.333333333333333e-05, |
|
"loss": 2.076, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.001714946128322708, |
|
"grad_norm": 0.6524346470832825, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 2.0047, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0017895090034671737, |
|
"grad_norm": 0.71756511926651, |
|
"learning_rate": 8e-05, |
|
"loss": 2.1351, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0018640718786116393, |
|
"grad_norm": 0.6905089020729065, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 2.0644, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.001938634753756105, |
|
"grad_norm": 0.6620098352432251, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 2.1028, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0020131976289005705, |
|
"grad_norm": 0.6542001962661743, |
|
"learning_rate": 9e-05, |
|
"loss": 2.0178, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002087760504045036, |
|
"grad_norm": 0.725846529006958, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 2.1445, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0021623233791895017, |
|
"grad_norm": 0.6756733655929565, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 2.0946, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.002236886254333967, |
|
"grad_norm": 0.6713683605194092, |
|
"learning_rate": 0.0001, |
|
"loss": 2.0383, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002311449129478433, |
|
"grad_norm": 0.8826857805252075, |
|
"learning_rate": 9.994965332706573e-05, |
|
"loss": 2.1636, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0023860120046228983, |
|
"grad_norm": 0.7072741985321045, |
|
"learning_rate": 9.979871469976196e-05, |
|
"loss": 2.0969, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0024605748797673637, |
|
"grad_norm": 0.6709433197975159, |
|
"learning_rate": 9.954748808839674e-05, |
|
"loss": 2.0807, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0025351377549118295, |
|
"grad_norm": 0.6410077810287476, |
|
"learning_rate": 9.919647942993148e-05, |
|
"loss": 1.9685, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.002609700630056295, |
|
"grad_norm": 0.7483009696006775, |
|
"learning_rate": 9.874639560909117e-05, |
|
"loss": 2.1124, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0026842635052007607, |
|
"grad_norm": 0.667682409286499, |
|
"learning_rate": 9.819814303479267e-05, |
|
"loss": 1.9806, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.002758826380345226, |
|
"grad_norm": 0.6942345499992371, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 2.0423, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.002833389255489692, |
|
"grad_norm": 0.7064806222915649, |
|
"learning_rate": 9.681174353198687e-05, |
|
"loss": 2.1869, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0029079521306341572, |
|
"grad_norm": 0.6997663974761963, |
|
"learning_rate": 9.597638862757255e-05, |
|
"loss": 2.1582, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0029825150057786226, |
|
"grad_norm": 0.6768945455551147, |
|
"learning_rate": 9.504844339512095e-05, |
|
"loss": 2.1976, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0030570778809230884, |
|
"grad_norm": 0.7602648138999939, |
|
"learning_rate": 9.40297765928369e-05, |
|
"loss": 2.0887, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.003131640756067554, |
|
"grad_norm": 0.6888823509216309, |
|
"learning_rate": 9.292243968009331e-05, |
|
"loss": 2.0271, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0032062036312120196, |
|
"grad_norm": 0.694747805595398, |
|
"learning_rate": 9.172866268606513e-05, |
|
"loss": 2.081, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.003280766506356485, |
|
"grad_norm": 0.6592668890953064, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.9375, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.003355329381500951, |
|
"grad_norm": 0.7744700312614441, |
|
"learning_rate": 8.90915741234015e-05, |
|
"loss": 2.1293, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.003429892256645416, |
|
"grad_norm": 0.7216829657554626, |
|
"learning_rate": 8.765357330018056e-05, |
|
"loss": 2.0884, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.003504455131789882, |
|
"grad_norm": 0.7232696413993835, |
|
"learning_rate": 8.613974319136958e-05, |
|
"loss": 2.0312, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0035790180069343474, |
|
"grad_norm": 0.964648425579071, |
|
"learning_rate": 8.455313244934324e-05, |
|
"loss": 1.9873, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.003653580882078813, |
|
"grad_norm": 0.8183997869491577, |
|
"learning_rate": 8.289693629698564e-05, |
|
"loss": 2.0139, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0037281437572232786, |
|
"grad_norm": 0.970118522644043, |
|
"learning_rate": 8.117449009293668e-05, |
|
"loss": 2.1238, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0037281437572232786, |
|
"eval_loss": 2.0290143489837646, |
|
"eval_runtime": 1975.9168, |
|
"eval_samples_per_second": 11.432, |
|
"eval_steps_per_second": 5.716, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.003802706632367744, |
|
"grad_norm": 0.5115548968315125, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.6365, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.00387726950751221, |
|
"grad_norm": 0.7051359415054321, |
|
"learning_rate": 7.754484907260513e-05, |
|
"loss": 2.0046, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.003951832382656675, |
|
"grad_norm": 0.46429726481437683, |
|
"learning_rate": 7.564496387029532e-05, |
|
"loss": 1.8634, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.004026395257801141, |
|
"grad_norm": 0.40492793917655945, |
|
"learning_rate": 7.369343312364993e-05, |
|
"loss": 1.9598, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.004100958132945607, |
|
"grad_norm": 0.39108383655548096, |
|
"learning_rate": 7.169418695587791e-05, |
|
"loss": 1.8478, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.004175521008090072, |
|
"grad_norm": 0.42470911145210266, |
|
"learning_rate": 6.965125158269619e-05, |
|
"loss": 2.0799, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.004250083883234538, |
|
"grad_norm": 0.45118194818496704, |
|
"learning_rate": 6.756874120406714e-05, |
|
"loss": 2.045, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.004324646758379003, |
|
"grad_norm": 0.4153003990650177, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 1.8896, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.004399209633523468, |
|
"grad_norm": 0.4770011305809021, |
|
"learning_rate": 6.330184227833376e-05, |
|
"loss": 2.0487, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.004473772508667934, |
|
"grad_norm": 0.46756961941719055, |
|
"learning_rate": 6.112604669781572e-05, |
|
"loss": 1.9976, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0045483353838124, |
|
"grad_norm": 0.47102442383766174, |
|
"learning_rate": 5.8927844739931834e-05, |
|
"loss": 1.9983, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.004622898258956866, |
|
"grad_norm": 0.5105215907096863, |
|
"learning_rate": 5.6711663290882776e-05, |
|
"loss": 2.0145, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.004697461134101331, |
|
"grad_norm": 0.48268643021583557, |
|
"learning_rate": 5.448196544517168e-05, |
|
"loss": 2.0284, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0047720240092457965, |
|
"grad_norm": 0.49653950333595276, |
|
"learning_rate": 5.2243241517525754e-05, |
|
"loss": 2.0299, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.004846586884390262, |
|
"grad_norm": 0.4842401444911957, |
|
"learning_rate": 5e-05, |
|
"loss": 1.9628, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.004921149759534727, |
|
"grad_norm": 0.5133103728294373, |
|
"learning_rate": 4.775675848247427e-05, |
|
"loss": 1.9677, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.004995712634679193, |
|
"grad_norm": 0.5002983808517456, |
|
"learning_rate": 4.551803455482833e-05, |
|
"loss": 2.0314, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.005070275509823659, |
|
"grad_norm": 0.5741726756095886, |
|
"learning_rate": 4.328833670911724e-05, |
|
"loss": 2.007, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.005144838384968125, |
|
"grad_norm": 0.5411697626113892, |
|
"learning_rate": 4.107215526006817e-05, |
|
"loss": 1.954, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.00521940126011259, |
|
"grad_norm": 0.5465304255485535, |
|
"learning_rate": 3.887395330218429e-05, |
|
"loss": 1.9437, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0052939641352570555, |
|
"grad_norm": 0.5512676239013672, |
|
"learning_rate": 3.6698157721666246e-05, |
|
"loss": 1.8954, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.005368527010401521, |
|
"grad_norm": 0.5813472270965576, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 2.0268, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.005443089885545986, |
|
"grad_norm": 0.5529889464378357, |
|
"learning_rate": 3.243125879593286e-05, |
|
"loss": 1.9832, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.005517652760690452, |
|
"grad_norm": 0.5440077781677246, |
|
"learning_rate": 3.0348748417303823e-05, |
|
"loss": 1.9148, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.005592215635834918, |
|
"grad_norm": 0.5623196363449097, |
|
"learning_rate": 2.8305813044122097e-05, |
|
"loss": 2.0762, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.005666778510979384, |
|
"grad_norm": 0.5488706827163696, |
|
"learning_rate": 2.630656687635007e-05, |
|
"loss": 1.9115, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.005741341386123849, |
|
"grad_norm": 0.5504707098007202, |
|
"learning_rate": 2.43550361297047e-05, |
|
"loss": 1.9987, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0058159042612683145, |
|
"grad_norm": 0.544782817363739, |
|
"learning_rate": 2.245515092739488e-05, |
|
"loss": 2.0636, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.00589046713641278, |
|
"grad_norm": 0.5880534648895264, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 1.9786, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.005965030011557245, |
|
"grad_norm": 0.526584267616272, |
|
"learning_rate": 1.8825509907063327e-05, |
|
"loss": 1.9431, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.006039592886701711, |
|
"grad_norm": 0.5774157643318176, |
|
"learning_rate": 1.7103063703014372e-05, |
|
"loss": 2.0321, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.006114155761846177, |
|
"grad_norm": 0.5706311464309692, |
|
"learning_rate": 1.544686755065677e-05, |
|
"loss": 2.0225, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.006188718636990643, |
|
"grad_norm": 0.5908026099205017, |
|
"learning_rate": 1.3860256808630428e-05, |
|
"loss": 1.9953, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.006263281512135108, |
|
"grad_norm": 0.6149851679801941, |
|
"learning_rate": 1.2346426699819458e-05, |
|
"loss": 1.9548, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0063378443872795735, |
|
"grad_norm": 0.6206725835800171, |
|
"learning_rate": 1.090842587659851e-05, |
|
"loss": 2.006, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.006412407262424039, |
|
"grad_norm": 0.610331118106842, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 2.0609, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.006486970137568504, |
|
"grad_norm": 0.5870972275733948, |
|
"learning_rate": 8.271337313934869e-06, |
|
"loss": 1.9884, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.00656153301271297, |
|
"grad_norm": 0.641563892364502, |
|
"learning_rate": 7.077560319906695e-06, |
|
"loss": 2.1203, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.006636095887857436, |
|
"grad_norm": 0.7119360566139221, |
|
"learning_rate": 5.9702234071631e-06, |
|
"loss": 2.0064, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.006710658763001902, |
|
"grad_norm": 0.6424351930618286, |
|
"learning_rate": 4.951556604879048e-06, |
|
"loss": 2.1295, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.006785221638146367, |
|
"grad_norm": 0.7995273470878601, |
|
"learning_rate": 4.023611372427471e-06, |
|
"loss": 1.9488, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.006859784513290832, |
|
"grad_norm": 0.6758121848106384, |
|
"learning_rate": 3.18825646801314e-06, |
|
"loss": 2.0611, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.006934347388435298, |
|
"grad_norm": 0.6477658152580261, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 1.9928, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.007008910263579764, |
|
"grad_norm": 0.6778663396835327, |
|
"learning_rate": 1.8018569652073381e-06, |
|
"loss": 2.0962, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.007083473138724229, |
|
"grad_norm": 0.6519765853881836, |
|
"learning_rate": 1.2536043909088191e-06, |
|
"loss": 2.0006, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.007158036013868695, |
|
"grad_norm": 0.7212676405906677, |
|
"learning_rate": 8.035205700685167e-07, |
|
"loss": 2.0475, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.007232598889013161, |
|
"grad_norm": 0.7102315425872803, |
|
"learning_rate": 4.52511911603265e-07, |
|
"loss": 2.0604, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.007307161764157626, |
|
"grad_norm": 0.7387847900390625, |
|
"learning_rate": 2.012853002380466e-07, |
|
"loss": 2.0218, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.007381724639302091, |
|
"grad_norm": 0.8174800276756287, |
|
"learning_rate": 5.0346672934270534e-08, |
|
"loss": 2.0107, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.007456287514446557, |
|
"grad_norm": 0.9243033528327942, |
|
"learning_rate": 0.0, |
|
"loss": 1.9478, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007456287514446557, |
|
"eval_loss": 1.97798752784729, |
|
"eval_runtime": 1976.5858, |
|
"eval_samples_per_second": 11.428, |
|
"eval_steps_per_second": 5.714, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.591481825576878e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|