|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 510, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00392156862745098, |
|
"grad_norm": 16.544786089270207, |
|
"learning_rate": 3.921568627450981e-07, |
|
"loss": 1.6547, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0196078431372549, |
|
"grad_norm": 14.762176989308287, |
|
"learning_rate": 1.96078431372549e-06, |
|
"loss": 1.617, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0392156862745098, |
|
"grad_norm": 5.1165863939415654, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 1.4078, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 2.2787880179843194, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 1.2895, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0784313725490196, |
|
"grad_norm": 1.832234050305862, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 1.2313, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09803921568627451, |
|
"grad_norm": 1.2251164606770868, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 1.1832, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 1.51446787757622, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 1.1558, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13725490196078433, |
|
"grad_norm": 1.1054530221391963, |
|
"learning_rate": 1.3725490196078432e-05, |
|
"loss": 1.1678, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1568627450980392, |
|
"grad_norm": 1.1955722975383631, |
|
"learning_rate": 1.568627450980392e-05, |
|
"loss": 1.162, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 1.1601166405223855, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 1.1303, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19607843137254902, |
|
"grad_norm": 1.2824519717268024, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 1.1272, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21568627450980393, |
|
"grad_norm": 1.1682993723393238, |
|
"learning_rate": 1.999625253802851e-05, |
|
"loss": 1.1209, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 1.302075491367261, |
|
"learning_rate": 1.9981033287370443e-05, |
|
"loss": 1.0887, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2549019607843137, |
|
"grad_norm": 1.4556930650649318, |
|
"learning_rate": 1.9954125840299165e-05, |
|
"loss": 1.1226, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.27450980392156865, |
|
"grad_norm": 1.289748942480489, |
|
"learning_rate": 1.9915561706530882e-05, |
|
"loss": 1.1327, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 1.1271075422611054, |
|
"learning_rate": 1.9865386046236597e-05, |
|
"loss": 1.1238, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3137254901960784, |
|
"grad_norm": 1.2436214858595933, |
|
"learning_rate": 1.9803657617157693e-05, |
|
"loss": 1.08, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 1.1690258085683884, |
|
"learning_rate": 1.973044870579824e-05, |
|
"loss": 1.111, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 1.0669536039386904, |
|
"learning_rate": 1.9645845042774555e-05, |
|
"loss": 1.1074, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37254901960784315, |
|
"grad_norm": 1.2617056965745426, |
|
"learning_rate": 1.9549945702421144e-05, |
|
"loss": 1.1073, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.39215686274509803, |
|
"grad_norm": 1.0667234116768218, |
|
"learning_rate": 1.9442862986770645e-05, |
|
"loss": 1.1018, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4117647058823529, |
|
"grad_norm": 1.1310556155759037, |
|
"learning_rate": 1.932472229404356e-05, |
|
"loss": 1.1108, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.43137254901960786, |
|
"grad_norm": 1.0707842591727832, |
|
"learning_rate": 1.9195661971801825e-05, |
|
"loss": 1.0978, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.45098039215686275, |
|
"grad_norm": 1.1964147323589545, |
|
"learning_rate": 1.9055833154938208e-05, |
|
"loss": 1.0952, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.47058823529411764, |
|
"grad_norm": 1.0532587111658636, |
|
"learning_rate": 1.8905399588691165e-05, |
|
"loss": 1.0901, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.49019607843137253, |
|
"grad_norm": 1.0534305550044942, |
|
"learning_rate": 1.8744537436892517e-05, |
|
"loss": 1.0739, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5098039215686274, |
|
"grad_norm": 1.032737951798708, |
|
"learning_rate": 1.8573435075672422e-05, |
|
"loss": 1.0871, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5294117647058824, |
|
"grad_norm": 1.0758071447152808, |
|
"learning_rate": 1.839229287286327e-05, |
|
"loss": 1.0945, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5490196078431373, |
|
"grad_norm": 1.109162566034754, |
|
"learning_rate": 1.8201322953360758e-05, |
|
"loss": 1.0708, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5686274509803921, |
|
"grad_norm": 1.317192816494283, |
|
"learning_rate": 1.800074895071704e-05, |
|
"loss": 1.0764, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 1.0795334451744183, |
|
"learning_rate": 1.7790805745256703e-05, |
|
"loss": 1.0791, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6078431372549019, |
|
"grad_norm": 1.0411588891000574, |
|
"learning_rate": 1.7571739189022365e-05, |
|
"loss": 1.091, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6274509803921569, |
|
"grad_norm": 1.0108445010492009, |
|
"learning_rate": 1.7343805817871885e-05, |
|
"loss": 1.0809, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6470588235294118, |
|
"grad_norm": 1.0901231094990715, |
|
"learning_rate": 1.710727255106447e-05, |
|
"loss": 1.0602, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 1.0945695101274928, |
|
"learning_rate": 1.686241637868734e-05, |
|
"loss": 1.0777, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.6862745098039216, |
|
"grad_norm": 1.193395829329031, |
|
"learning_rate": 1.660952403728902e-05, |
|
"loss": 1.0444, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 1.1047822048211582, |
|
"learning_rate": 1.634889167409923e-05, |
|
"loss": 1.0534, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7254901960784313, |
|
"grad_norm": 1.114058959698183, |
|
"learning_rate": 1.6080824500228367e-05, |
|
"loss": 1.0529, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7450980392156863, |
|
"grad_norm": 0.9904799394022883, |
|
"learning_rate": 1.5805636433252892e-05, |
|
"loss": 1.0793, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.7647058823529411, |
|
"grad_norm": 1.2525818789945302, |
|
"learning_rate": 1.552364972960506e-05, |
|
"loss": 1.0708, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.7843137254901961, |
|
"grad_norm": 1.1154866308925613, |
|
"learning_rate": 1.5235194607197508e-05, |
|
"loss": 1.0241, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.803921568627451, |
|
"grad_norm": 1.0785210608261893, |
|
"learning_rate": 1.494060885872464e-05, |
|
"loss": 1.0733, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.8235294117647058, |
|
"grad_norm": 1.1856668561642758, |
|
"learning_rate": 1.4640237456093636e-05, |
|
"loss": 1.0704, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.8431372549019608, |
|
"grad_norm": 1.1422476630742018, |
|
"learning_rate": 1.4334432146448272e-05, |
|
"loss": 1.0655, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.8627450980392157, |
|
"grad_norm": 0.9839617489985488, |
|
"learning_rate": 1.4023551040258726e-05, |
|
"loss": 1.0551, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 1.0486919008457822, |
|
"learning_rate": 1.3707958191959609e-05, |
|
"loss": 1.0479, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9019607843137255, |
|
"grad_norm": 1.0279452846404875, |
|
"learning_rate": 1.3388023173627413e-05, |
|
"loss": 1.0466, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.9215686274509803, |
|
"grad_norm": 1.0670615822796066, |
|
"learning_rate": 1.3064120642196549e-05, |
|
"loss": 1.0554, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"grad_norm": 1.0549543392774092, |
|
"learning_rate": 1.2736629900720832e-05, |
|
"loss": 1.0648, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.9607843137254902, |
|
"grad_norm": 0.9674738336064063, |
|
"learning_rate": 1.2405934454194146e-05, |
|
"loss": 1.0197, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.9803921568627451, |
|
"grad_norm": 0.9981821541554068, |
|
"learning_rate": 1.2072421560450497e-05, |
|
"loss": 1.0355, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.9908436174645702, |
|
"learning_rate": 1.1736481776669307e-05, |
|
"loss": 1.0409, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.0343828201293945, |
|
"eval_runtime": 4.2299, |
|
"eval_samples_per_second": 37.826, |
|
"eval_steps_per_second": 0.709, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.0196078431372548, |
|
"grad_norm": 2.78732105665186, |
|
"learning_rate": 1.1398508502017047e-05, |
|
"loss": 0.7588, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.0392156862745099, |
|
"grad_norm": 1.5405892203726659, |
|
"learning_rate": 1.1058897516960817e-05, |
|
"loss": 0.7294, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.0588235294117647, |
|
"grad_norm": 1.1503472595942978, |
|
"learning_rate": 1.0718046519793276e-05, |
|
"loss": 0.7224, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.0784313725490196, |
|
"grad_norm": 1.1159446663960584, |
|
"learning_rate": 1.0376354660911772e-05, |
|
"loss": 0.7354, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.0980392156862746, |
|
"grad_norm": 1.0502332790639606, |
|
"learning_rate": 1.0034222075396954e-05, |
|
"loss": 0.7255, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.1176470588235294, |
|
"grad_norm": 0.9509421475398706, |
|
"learning_rate": 9.692049414438298e-06, |
|
"loss": 0.7154, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.1372549019607843, |
|
"grad_norm": 0.988479903928271, |
|
"learning_rate": 9.350237376155269e-06, |
|
"loss": 0.7218, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.156862745098039, |
|
"grad_norm": 1.0085376248525226, |
|
"learning_rate": 9.00918623636349e-06, |
|
"loss": 0.7366, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 0.9961601641040383, |
|
"learning_rate": 8.669295379835467e-06, |
|
"loss": 0.7184, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.196078431372549, |
|
"grad_norm": 0.9921390197234601, |
|
"learning_rate": 8.330962832604747e-06, |
|
"loss": 0.7313, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.215686274509804, |
|
"grad_norm": 1.0706453846667212, |
|
"learning_rate": 7.994584795861248e-06, |
|
"loss": 0.7257, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.2352941176470589, |
|
"grad_norm": 1.0297863168482826, |
|
"learning_rate": 7.660555181983517e-06, |
|
"loss": 0.7468, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.2549019607843137, |
|
"grad_norm": 0.9995332244709463, |
|
"learning_rate": 7.329265153251285e-06, |
|
"loss": 0.7439, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.2745098039215685, |
|
"grad_norm": 1.0186580227806559, |
|
"learning_rate": 7.001102663778533e-06, |
|
"loss": 0.7172, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.2941176470588236, |
|
"grad_norm": 1.014034224454933, |
|
"learning_rate": 6.6764520052034054e-06, |
|
"loss": 0.7309, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.3137254901960784, |
|
"grad_norm": 1.0151160383199134, |
|
"learning_rate": 6.3556933566670656e-06, |
|
"loss": 0.719, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.9563996745028378, |
|
"learning_rate": 6.039202339608432e-06, |
|
"loss": 0.7448, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.3529411764705883, |
|
"grad_norm": 0.9430025085055084, |
|
"learning_rate": 5.727349577896194e-06, |
|
"loss": 0.715, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.3725490196078431, |
|
"grad_norm": 0.9520104411677209, |
|
"learning_rate": 5.420500263813141e-06, |
|
"loss": 0.7423, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.392156862745098, |
|
"grad_norm": 0.9533462340832225, |
|
"learning_rate": 5.119013730401152e-06, |
|
"loss": 0.7206, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.4117647058823528, |
|
"grad_norm": 0.944013138090968, |
|
"learning_rate": 4.823243030667576e-06, |
|
"loss": 0.7112, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.4313725490196079, |
|
"grad_norm": 0.9654739136152124, |
|
"learning_rate": 4.533534524145756e-06, |
|
"loss": 0.7092, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.4509803921568627, |
|
"grad_norm": 1.0325541079170395, |
|
"learning_rate": 4.2502274712939355e-06, |
|
"loss": 0.7218, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 0.945807759954701, |
|
"learning_rate": 3.973653636207437e-06, |
|
"loss": 0.7304, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.4901960784313726, |
|
"grad_norm": 0.9339565355678595, |
|
"learning_rate": 3.704136898109403e-06, |
|
"loss": 0.723, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.5098039215686274, |
|
"grad_norm": 0.9680921345423967, |
|
"learning_rate": 3.4419928720750274e-06, |
|
"loss": 0.7382, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.5294117647058822, |
|
"grad_norm": 0.9738097860413395, |
|
"learning_rate": 3.1875285394334575e-06, |
|
"loss": 0.7129, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.5490196078431373, |
|
"grad_norm": 0.9541739057939187, |
|
"learning_rate": 2.9410418882801682e-06, |
|
"loss": 0.7392, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.5686274509803921, |
|
"grad_norm": 0.9315613334411681, |
|
"learning_rate": 2.702821564520732e-06, |
|
"loss": 0.7323, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.5882352941176472, |
|
"grad_norm": 0.9959745037027158, |
|
"learning_rate": 2.4731465338547556e-06, |
|
"loss": 0.7501, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.607843137254902, |
|
"grad_norm": 0.9359344044277552, |
|
"learning_rate": 2.252285755095652e-06, |
|
"loss": 0.733, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.6274509803921569, |
|
"grad_norm": 0.9222172843750491, |
|
"learning_rate": 2.0404978652089325e-06, |
|
"loss": 0.6955, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.6470588235294117, |
|
"grad_norm": 0.9295279337365954, |
|
"learning_rate": 1.8380308764377841e-06, |
|
"loss": 0.7209, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.9978286311378533, |
|
"learning_rate": 1.6451218858706374e-06, |
|
"loss": 0.7229, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.6862745098039216, |
|
"grad_norm": 0.9673841209469143, |
|
"learning_rate": 1.4619967977908157e-06, |
|
"loss": 0.7197, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.7058823529411766, |
|
"grad_norm": 0.9537359526264119, |
|
"learning_rate": 1.2888700591334225e-06, |
|
"loss": 0.7144, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.7254901960784315, |
|
"grad_norm": 0.9009373079713732, |
|
"learning_rate": 1.1259444083592585e-06, |
|
"loss": 0.7078, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.7450980392156863, |
|
"grad_norm": 0.9618775347330766, |
|
"learning_rate": 9.734106380398022e-07, |
|
"loss": 0.743, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 0.951269838967216, |
|
"learning_rate": 8.31447371431372e-07, |
|
"loss": 0.7278, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.784313725490196, |
|
"grad_norm": 0.912258612261103, |
|
"learning_rate": 7.002208532999933e-07, |
|
"loss": 0.7241, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.803921568627451, |
|
"grad_norm": 0.9276161963363886, |
|
"learning_rate": 5.798847552420184e-07, |
|
"loss": 0.7143, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.8235294117647058, |
|
"grad_norm": 0.9666247110758565, |
|
"learning_rate": 4.7057999572843516e-07, |
|
"loss": 0.7165, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.843137254901961, |
|
"grad_norm": 0.9040376476718873, |
|
"learning_rate": 3.7243457508358784e-07, |
|
"loss": 0.7192, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.8627450980392157, |
|
"grad_norm": 0.9529298475220304, |
|
"learning_rate": 2.8556342559159513e-07, |
|
"loss": 0.7213, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.8823529411764706, |
|
"grad_norm": 0.9373675816598103, |
|
"learning_rate": 2.1006827690595478e-07, |
|
"loss": 0.7247, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.9019607843137254, |
|
"grad_norm": 0.9049110221925867, |
|
"learning_rate": 1.4603753691998735e-07, |
|
"loss": 0.7217, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.9215686274509802, |
|
"grad_norm": 1.0200136766624364, |
|
"learning_rate": 9.354618823758654e-08, |
|
"loss": 0.712, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.9411764705882353, |
|
"grad_norm": 1.0048113834797883, |
|
"learning_rate": 5.265570036553813e-08, |
|
"loss": 0.7337, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.9607843137254903, |
|
"grad_norm": 0.9199685223066211, |
|
"learning_rate": 2.3413957730226144e-08, |
|
"loss": 0.7121, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.9803921568627452, |
|
"grad_norm": 0.8894954442490418, |
|
"learning_rate": 5.855203603017945e-09, |
|
"loss": 0.7124, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.9330570772451235, |
|
"learning_rate": 0.0, |
|
"loss": 0.7276, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.0487182140350342, |
|
"eval_runtime": 5.9345, |
|
"eval_samples_per_second": 26.961, |
|
"eval_steps_per_second": 0.506, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 510, |
|
"total_flos": 106783624396800.0, |
|
"train_loss": 0.9181881465163886, |
|
"train_runtime": 3659.086, |
|
"train_samples_per_second": 8.919, |
|
"train_steps_per_second": 0.139 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 510, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 106783624396800.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|