|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9994856848962796, |
|
"eval_steps": 500, |
|
"global_step": 4374, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006857534716269502, |
|
"grad_norm": 5.500612367122994e+18, |
|
"learning_rate": 2.2831050228310502e-08, |
|
"loss": 1.1976, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0034287673581347507, |
|
"grad_norm": 324.38059468358585, |
|
"learning_rate": 1.1415525114155251e-07, |
|
"loss": 1.175, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006857534716269501, |
|
"grad_norm": 265.1750103761022, |
|
"learning_rate": 2.2831050228310502e-07, |
|
"loss": 1.1601, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010286302074404252, |
|
"grad_norm": 88.79558048967972, |
|
"learning_rate": 3.4246575342465755e-07, |
|
"loss": 1.1645, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013715069432539003, |
|
"grad_norm": 997.1728503325202, |
|
"learning_rate": 4.5662100456621004e-07, |
|
"loss": 1.1547, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01714383679067375, |
|
"grad_norm": 138.0901381156419, |
|
"learning_rate": 5.707762557077626e-07, |
|
"loss": 1.1146, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.020572604148808505, |
|
"grad_norm": 3.6970703101396913, |
|
"learning_rate": 6.849315068493151e-07, |
|
"loss": 1.0849, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.024001371506943255, |
|
"grad_norm": 2.837595372313584, |
|
"learning_rate": 7.990867579908676e-07, |
|
"loss": 1.0569, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.027430138865078006, |
|
"grad_norm": 6.412430536312489, |
|
"learning_rate": 9.132420091324201e-07, |
|
"loss": 1.0229, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.030858906223212756, |
|
"grad_norm": 1.429707929152891, |
|
"learning_rate": 1.0273972602739727e-06, |
|
"loss": 0.9816, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0342876735813475, |
|
"grad_norm": 1.1893517204804187, |
|
"learning_rate": 1.1415525114155251e-06, |
|
"loss": 0.9501, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03771644093948225, |
|
"grad_norm": 1.316763932645065, |
|
"learning_rate": 1.2557077625570776e-06, |
|
"loss": 0.9212, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04114520829761701, |
|
"grad_norm": 1.2662857030430075, |
|
"learning_rate": 1.3698630136986302e-06, |
|
"loss": 0.9013, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04457397565575176, |
|
"grad_norm": 1.1097798063025857, |
|
"learning_rate": 1.4840182648401826e-06, |
|
"loss": 0.8951, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04800274301388651, |
|
"grad_norm": 1.1990919640253435, |
|
"learning_rate": 1.5981735159817353e-06, |
|
"loss": 0.8941, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05143151037202126, |
|
"grad_norm": 1.154454828366115, |
|
"learning_rate": 1.7123287671232877e-06, |
|
"loss": 0.8742, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05486027773015601, |
|
"grad_norm": 1.1710287337749217, |
|
"learning_rate": 1.8264840182648401e-06, |
|
"loss": 0.8719, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05828904508829076, |
|
"grad_norm": 1.1235385190661975, |
|
"learning_rate": 1.9406392694063926e-06, |
|
"loss": 0.8612, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06171781244642551, |
|
"grad_norm": 1.3849808686622438, |
|
"learning_rate": 2.0547945205479454e-06, |
|
"loss": 0.8373, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06514657980456026, |
|
"grad_norm": 1.2705169052299314, |
|
"learning_rate": 2.168949771689498e-06, |
|
"loss": 0.846, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.068575347162695, |
|
"grad_norm": 1.2257177989089036, |
|
"learning_rate": 2.2831050228310503e-06, |
|
"loss": 0.8327, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07200411452082976, |
|
"grad_norm": 1.3499311190223524, |
|
"learning_rate": 2.3972602739726027e-06, |
|
"loss": 0.8223, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0754328818789645, |
|
"grad_norm": 1.1551811574767, |
|
"learning_rate": 2.511415525114155e-06, |
|
"loss": 0.8075, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07886164923709926, |
|
"grad_norm": 1.1152359835046965, |
|
"learning_rate": 2.625570776255708e-06, |
|
"loss": 0.8072, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08229041659523402, |
|
"grad_norm": 1.128721063171254, |
|
"learning_rate": 2.7397260273972604e-06, |
|
"loss": 0.799, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08571918395336876, |
|
"grad_norm": 1.0606734214287294, |
|
"learning_rate": 2.8538812785388133e-06, |
|
"loss": 0.8028, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08914795131150352, |
|
"grad_norm": 1.0207053836629467, |
|
"learning_rate": 2.9680365296803653e-06, |
|
"loss": 0.7793, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09257671866963826, |
|
"grad_norm": 0.9678836166894507, |
|
"learning_rate": 3.082191780821918e-06, |
|
"loss": 0.7818, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09600548602777302, |
|
"grad_norm": 0.958558687009426, |
|
"learning_rate": 3.1963470319634706e-06, |
|
"loss": 0.78, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09943425338590776, |
|
"grad_norm": 0.9103829103840835, |
|
"learning_rate": 3.310502283105023e-06, |
|
"loss": 0.7745, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10286302074404252, |
|
"grad_norm": 1.072709628680254, |
|
"learning_rate": 3.4246575342465754e-06, |
|
"loss": 0.76, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10629178810217726, |
|
"grad_norm": 0.87005113272475, |
|
"learning_rate": 3.5388127853881283e-06, |
|
"loss": 0.7583, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.10972055546031202, |
|
"grad_norm": 0.7285413995681538, |
|
"learning_rate": 3.6529680365296803e-06, |
|
"loss": 0.7624, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11314932281844677, |
|
"grad_norm": 0.8949714023624847, |
|
"learning_rate": 3.767123287671233e-06, |
|
"loss": 0.7475, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11657809017658152, |
|
"grad_norm": 0.721003961805146, |
|
"learning_rate": 3.881278538812785e-06, |
|
"loss": 0.7564, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12000685753471627, |
|
"grad_norm": 0.7605946035325595, |
|
"learning_rate": 3.995433789954338e-06, |
|
"loss": 0.7383, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12343562489285102, |
|
"grad_norm": 1.0401397098538707, |
|
"learning_rate": 4.109589041095891e-06, |
|
"loss": 0.7488, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12686439225098578, |
|
"grad_norm": 0.9619516801856884, |
|
"learning_rate": 4.223744292237444e-06, |
|
"loss": 0.7361, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.13029315960912052, |
|
"grad_norm": 0.7114806408995153, |
|
"learning_rate": 4.337899543378996e-06, |
|
"loss": 0.7413, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13372192696725527, |
|
"grad_norm": 0.7788966170765217, |
|
"learning_rate": 4.4520547945205486e-06, |
|
"loss": 0.7288, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.13715069432539, |
|
"grad_norm": 0.7788077365287874, |
|
"learning_rate": 4.566210045662101e-06, |
|
"loss": 0.7241, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14057946168352478, |
|
"grad_norm": 0.713495426998104, |
|
"learning_rate": 4.6803652968036534e-06, |
|
"loss": 0.7335, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.14400822904165952, |
|
"grad_norm": 0.7672558645856348, |
|
"learning_rate": 4.7945205479452054e-06, |
|
"loss": 0.7262, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14743699639979427, |
|
"grad_norm": 0.7374473797133105, |
|
"learning_rate": 4.908675799086758e-06, |
|
"loss": 0.721, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.150865763757929, |
|
"grad_norm": 0.7501307226424111, |
|
"learning_rate": 5.02283105022831e-06, |
|
"loss": 0.7186, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15429453111606378, |
|
"grad_norm": 0.6971360520708836, |
|
"learning_rate": 5.136986301369864e-06, |
|
"loss": 0.7194, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15772329847419853, |
|
"grad_norm": 0.6367123001071819, |
|
"learning_rate": 5.251141552511416e-06, |
|
"loss": 0.719, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16115206583233327, |
|
"grad_norm": 1.6312076754734648, |
|
"learning_rate": 5.365296803652969e-06, |
|
"loss": 0.7139, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.16458083319046804, |
|
"grad_norm": 0.8262096198704073, |
|
"learning_rate": 5.479452054794521e-06, |
|
"loss": 0.7089, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16800960054860278, |
|
"grad_norm": 0.9448491152723366, |
|
"learning_rate": 5.593607305936074e-06, |
|
"loss": 0.7103, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.17143836790673753, |
|
"grad_norm": 0.8493857772402849, |
|
"learning_rate": 5.7077625570776266e-06, |
|
"loss": 0.7011, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17486713526487227, |
|
"grad_norm": 0.7340850975823853, |
|
"learning_rate": 5.821917808219179e-06, |
|
"loss": 0.704, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.17829590262300704, |
|
"grad_norm": 0.713122055338853, |
|
"learning_rate": 5.936073059360731e-06, |
|
"loss": 0.7025, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18172466998114178, |
|
"grad_norm": 0.7862099070131026, |
|
"learning_rate": 6.050228310502284e-06, |
|
"loss": 0.7023, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.18515343733927653, |
|
"grad_norm": 0.8003249221177992, |
|
"learning_rate": 6.164383561643836e-06, |
|
"loss": 0.6985, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18858220469741127, |
|
"grad_norm": 0.72870713699412, |
|
"learning_rate": 6.278538812785388e-06, |
|
"loss": 0.6896, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19201097205554604, |
|
"grad_norm": 0.68892271391433, |
|
"learning_rate": 6.392694063926941e-06, |
|
"loss": 0.6937, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19543973941368079, |
|
"grad_norm": 0.8188477395535939, |
|
"learning_rate": 6.506849315068494e-06, |
|
"loss": 0.6927, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.19886850677181553, |
|
"grad_norm": 0.7692820637205647, |
|
"learning_rate": 6.621004566210046e-06, |
|
"loss": 0.7009, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.20229727412995027, |
|
"grad_norm": 0.6627944117356517, |
|
"learning_rate": 6.735159817351599e-06, |
|
"loss": 0.6876, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.20572604148808504, |
|
"grad_norm": 0.7266007369078403, |
|
"learning_rate": 6.849315068493151e-06, |
|
"loss": 0.6853, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2091548088462198, |
|
"grad_norm": 0.6424680301484417, |
|
"learning_rate": 6.9634703196347046e-06, |
|
"loss": 0.6938, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.21258357620435453, |
|
"grad_norm": 0.7129680875859563, |
|
"learning_rate": 7.077625570776257e-06, |
|
"loss": 0.6909, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.21601234356248927, |
|
"grad_norm": 0.9106015540335944, |
|
"learning_rate": 7.191780821917809e-06, |
|
"loss": 0.6916, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.21944111092062404, |
|
"grad_norm": 0.6760706144438374, |
|
"learning_rate": 7.305936073059361e-06, |
|
"loss": 0.6851, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2228698782787588, |
|
"grad_norm": 0.6512553623989397, |
|
"learning_rate": 7.420091324200914e-06, |
|
"loss": 0.6897, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.22629864563689353, |
|
"grad_norm": 0.7850669023798967, |
|
"learning_rate": 7.534246575342466e-06, |
|
"loss": 0.6835, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22972741299502827, |
|
"grad_norm": 1.3429850514334625, |
|
"learning_rate": 7.648401826484018e-06, |
|
"loss": 0.6862, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.23315618035316305, |
|
"grad_norm": 1.0601962354732333, |
|
"learning_rate": 7.76255707762557e-06, |
|
"loss": 0.6865, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2365849477112978, |
|
"grad_norm": 0.7310633594389766, |
|
"learning_rate": 7.876712328767124e-06, |
|
"loss": 0.6796, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.24001371506943253, |
|
"grad_norm": 1.2791540029610797, |
|
"learning_rate": 7.990867579908676e-06, |
|
"loss": 0.678, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2434424824275673, |
|
"grad_norm": 1.0514021597488898, |
|
"learning_rate": 8.105022831050228e-06, |
|
"loss": 0.6773, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.24687124978570205, |
|
"grad_norm": 0.6590297903145794, |
|
"learning_rate": 8.219178082191782e-06, |
|
"loss": 0.6787, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2503000171438368, |
|
"grad_norm": 1.4407668581292676, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.6747, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.25372878450197156, |
|
"grad_norm": 0.5783708488581324, |
|
"learning_rate": 8.447488584474887e-06, |
|
"loss": 0.6761, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2571575518601063, |
|
"grad_norm": 0.7196205508845748, |
|
"learning_rate": 8.56164383561644e-06, |
|
"loss": 0.6741, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.26058631921824105, |
|
"grad_norm": 0.8426560938281862, |
|
"learning_rate": 8.675799086757991e-06, |
|
"loss": 0.6783, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2640150865763758, |
|
"grad_norm": 0.6248407108995583, |
|
"learning_rate": 8.789954337899545e-06, |
|
"loss": 0.6812, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.26744385393451053, |
|
"grad_norm": 0.625929290973614, |
|
"learning_rate": 8.904109589041097e-06, |
|
"loss": 0.6722, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2708726212926453, |
|
"grad_norm": 0.8071769890986592, |
|
"learning_rate": 9.01826484018265e-06, |
|
"loss": 0.6777, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.27430138865078, |
|
"grad_norm": 0.9295641422813844, |
|
"learning_rate": 9.132420091324201e-06, |
|
"loss": 0.6744, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2777301560089148, |
|
"grad_norm": 0.7634843175176039, |
|
"learning_rate": 9.246575342465755e-06, |
|
"loss": 0.6718, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.28115892336704956, |
|
"grad_norm": 0.8056800028036205, |
|
"learning_rate": 9.360730593607307e-06, |
|
"loss": 0.6685, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2845876907251843, |
|
"grad_norm": 0.7359796110052708, |
|
"learning_rate": 9.474885844748859e-06, |
|
"loss": 0.6686, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.28801645808331905, |
|
"grad_norm": 0.8275083411284923, |
|
"learning_rate": 9.589041095890411e-06, |
|
"loss": 0.6713, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2914452254414538, |
|
"grad_norm": 0.9767666036041072, |
|
"learning_rate": 9.703196347031965e-06, |
|
"loss": 0.6644, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.29487399279958854, |
|
"grad_norm": 0.6970783265751548, |
|
"learning_rate": 9.817351598173517e-06, |
|
"loss": 0.6692, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2983027601577233, |
|
"grad_norm": 0.584165558382207, |
|
"learning_rate": 9.931506849315069e-06, |
|
"loss": 0.6659, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.301731527515858, |
|
"grad_norm": 0.6245810626396433, |
|
"learning_rate": 9.999993629265979e-06, |
|
"loss": 0.6647, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3051602948739928, |
|
"grad_norm": 0.7335612100903357, |
|
"learning_rate": 9.999921958694681e-06, |
|
"loss": 0.667, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.30858906223212756, |
|
"grad_norm": 0.6843530498214124, |
|
"learning_rate": 9.999770655279843e-06, |
|
"loss": 0.6683, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3120178295902623, |
|
"grad_norm": 0.6124751321352965, |
|
"learning_rate": 9.999539721431253e-06, |
|
"loss": 0.6619, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.31544659694839705, |
|
"grad_norm": 0.8614237317784202, |
|
"learning_rate": 9.999229160826947e-06, |
|
"loss": 0.6637, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3188753643065318, |
|
"grad_norm": 0.6836210220640591, |
|
"learning_rate": 9.998838978413167e-06, |
|
"loss": 0.664, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.32230413166466654, |
|
"grad_norm": 0.7142979223973136, |
|
"learning_rate": 9.998369180404283e-06, |
|
"loss": 0.6682, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3257328990228013, |
|
"grad_norm": 0.6970601000506839, |
|
"learning_rate": 9.997819774282676e-06, |
|
"loss": 0.6654, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3291616663809361, |
|
"grad_norm": 0.7855321640617767, |
|
"learning_rate": 9.997190768798639e-06, |
|
"loss": 0.6628, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3325904337390708, |
|
"grad_norm": 0.8068114924796952, |
|
"learning_rate": 9.996482173970227e-06, |
|
"loss": 0.6532, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.33601920109720557, |
|
"grad_norm": 0.6190829521241801, |
|
"learning_rate": 9.995694001083103e-06, |
|
"loss": 0.6625, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3394479684553403, |
|
"grad_norm": 0.6559745100197821, |
|
"learning_rate": 9.994826262690347e-06, |
|
"loss": 0.6601, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.34287673581347505, |
|
"grad_norm": 0.6960576238421567, |
|
"learning_rate": 9.993878972612276e-06, |
|
"loss": 0.6575, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3463055031716098, |
|
"grad_norm": 0.8092838621716005, |
|
"learning_rate": 9.992852145936202e-06, |
|
"loss": 0.6562, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.34973427052974454, |
|
"grad_norm": 0.7436128976237549, |
|
"learning_rate": 9.991745799016206e-06, |
|
"loss": 0.6514, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3531630378878793, |
|
"grad_norm": 0.754160742003698, |
|
"learning_rate": 9.990559949472875e-06, |
|
"loss": 0.6579, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.3565918052460141, |
|
"grad_norm": 0.6596249438947815, |
|
"learning_rate": 9.989294616193018e-06, |
|
"loss": 0.6619, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3600205726041488, |
|
"grad_norm": 0.7268392727602604, |
|
"learning_rate": 9.987949819329366e-06, |
|
"loss": 0.6569, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.36344933996228357, |
|
"grad_norm": 0.7908918422796353, |
|
"learning_rate": 9.986525580300253e-06, |
|
"loss": 0.6602, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3668781073204183, |
|
"grad_norm": 0.731825676141184, |
|
"learning_rate": 9.985021921789274e-06, |
|
"loss": 0.659, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.37030687467855306, |
|
"grad_norm": 0.6826536905758885, |
|
"learning_rate": 9.983438867744923e-06, |
|
"loss": 0.6501, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3737356420366878, |
|
"grad_norm": 0.6873425512137389, |
|
"learning_rate": 9.981776443380214e-06, |
|
"loss": 0.653, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.37716440939482254, |
|
"grad_norm": 0.6087207671115116, |
|
"learning_rate": 9.980034675172274e-06, |
|
"loss": 0.6501, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3805931767529573, |
|
"grad_norm": 0.6758600678083865, |
|
"learning_rate": 9.97821359086193e-06, |
|
"loss": 0.6457, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.3840219441110921, |
|
"grad_norm": 0.6996994398315339, |
|
"learning_rate": 9.976313219453255e-06, |
|
"loss": 0.6516, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3874507114692268, |
|
"grad_norm": 0.5956097888192097, |
|
"learning_rate": 9.97433359121312e-06, |
|
"loss": 0.6561, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.39087947882736157, |
|
"grad_norm": 0.5809586858657639, |
|
"learning_rate": 9.972274737670702e-06, |
|
"loss": 0.6515, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3943082461854963, |
|
"grad_norm": 0.6234744336805654, |
|
"learning_rate": 9.970136691616985e-06, |
|
"loss": 0.6567, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.39773701354363106, |
|
"grad_norm": 0.7019194825681323, |
|
"learning_rate": 9.967919487104237e-06, |
|
"loss": 0.6407, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4011657809017658, |
|
"grad_norm": 0.6464500473501631, |
|
"learning_rate": 9.965623159445471e-06, |
|
"loss": 0.6478, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.40459454825990054, |
|
"grad_norm": 0.6397619177468029, |
|
"learning_rate": 9.963247745213876e-06, |
|
"loss": 0.6438, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.40802331561803534, |
|
"grad_norm": 0.6910890699251881, |
|
"learning_rate": 9.960793282242243e-06, |
|
"loss": 0.649, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4114520829761701, |
|
"grad_norm": 0.7163950332830918, |
|
"learning_rate": 9.958259809622353e-06, |
|
"loss": 0.6455, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.41488085033430483, |
|
"grad_norm": 0.8206374824286882, |
|
"learning_rate": 9.955647367704362e-06, |
|
"loss": 0.6569, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4183096176924396, |
|
"grad_norm": 0.7207309045108575, |
|
"learning_rate": 9.952955998096155e-06, |
|
"loss": 0.6446, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4217383850505743, |
|
"grad_norm": 0.5651258904128029, |
|
"learning_rate": 9.950185743662685e-06, |
|
"loss": 0.6444, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.42516715240870906, |
|
"grad_norm": 0.5443692863606336, |
|
"learning_rate": 9.94733664852529e-06, |
|
"loss": 0.6418, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4285959197668438, |
|
"grad_norm": 0.6042279487491544, |
|
"learning_rate": 9.944408758060982e-06, |
|
"loss": 0.6463, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.43202468712497855, |
|
"grad_norm": 0.5656088824328167, |
|
"learning_rate": 9.941402118901743e-06, |
|
"loss": 0.6407, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.43545345448311334, |
|
"grad_norm": 0.658014001778976, |
|
"learning_rate": 9.938316778933763e-06, |
|
"loss": 0.6468, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.4388822218412481, |
|
"grad_norm": 0.6639637664489146, |
|
"learning_rate": 9.935152787296689e-06, |
|
"loss": 0.6494, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.44231098919938283, |
|
"grad_norm": 0.5783566760560125, |
|
"learning_rate": 9.931910194382836e-06, |
|
"loss": 0.6463, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.4457397565575176, |
|
"grad_norm": 0.6490847428513987, |
|
"learning_rate": 9.928589051836392e-06, |
|
"loss": 0.6438, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4491685239156523, |
|
"grad_norm": 0.5672865850934122, |
|
"learning_rate": 9.925189412552585e-06, |
|
"loss": 0.6333, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.45259729127378706, |
|
"grad_norm": 0.569087352718797, |
|
"learning_rate": 9.921711330676848e-06, |
|
"loss": 0.6478, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4560260586319218, |
|
"grad_norm": 0.6207854393319554, |
|
"learning_rate": 9.918154861603959e-06, |
|
"loss": 0.6431, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.45945482599005655, |
|
"grad_norm": 0.586346598273814, |
|
"learning_rate": 9.91452006197715e-06, |
|
"loss": 0.6383, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.46288359334819135, |
|
"grad_norm": 0.5045583675197564, |
|
"learning_rate": 9.910806989687207e-06, |
|
"loss": 0.6365, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.4663123607063261, |
|
"grad_norm": 0.6674046560237406, |
|
"learning_rate": 9.907015703871558e-06, |
|
"loss": 0.6446, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.46974112806446083, |
|
"grad_norm": 0.6966429050410609, |
|
"learning_rate": 9.903146264913318e-06, |
|
"loss": 0.6416, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.4731698954225956, |
|
"grad_norm": 0.6302591461200223, |
|
"learning_rate": 9.899198734440335e-06, |
|
"loss": 0.6371, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4765986627807303, |
|
"grad_norm": 0.8144573920281895, |
|
"learning_rate": 9.895173175324205e-06, |
|
"loss": 0.6342, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.48002743013886506, |
|
"grad_norm": 0.7886436043083611, |
|
"learning_rate": 9.891069651679273e-06, |
|
"loss": 0.6355, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4834561974969998, |
|
"grad_norm": 0.8615238941842659, |
|
"learning_rate": 9.886888228861608e-06, |
|
"loss": 0.6364, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.4868849648551346, |
|
"grad_norm": 0.6441681216184715, |
|
"learning_rate": 9.882628973467972e-06, |
|
"loss": 0.6365, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.49031373221326935, |
|
"grad_norm": 0.539258830386699, |
|
"learning_rate": 9.878291953334744e-06, |
|
"loss": 0.6409, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.4937424995714041, |
|
"grad_norm": 0.5498193436814545, |
|
"learning_rate": 9.873877237536854e-06, |
|
"loss": 0.6394, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.49717126692953884, |
|
"grad_norm": 0.574880600021989, |
|
"learning_rate": 9.869384896386669e-06, |
|
"loss": 0.6413, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5006000342876736, |
|
"grad_norm": 0.618195186077868, |
|
"learning_rate": 9.86481500143289e-06, |
|
"loss": 0.6416, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5040288016458083, |
|
"grad_norm": 0.5418237252054477, |
|
"learning_rate": 9.860167625459398e-06, |
|
"loss": 0.6372, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5074575690039431, |
|
"grad_norm": 0.5863545275273223, |
|
"learning_rate": 9.855442842484101e-06, |
|
"loss": 0.6345, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5108863363620778, |
|
"grad_norm": 0.553229939288754, |
|
"learning_rate": 9.850640727757753e-06, |
|
"loss": 0.6347, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5143151037202126, |
|
"grad_norm": 0.6152186401867638, |
|
"learning_rate": 9.84576135776276e-06, |
|
"loss": 0.6355, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5177438710783473, |
|
"grad_norm": 0.6357690608918045, |
|
"learning_rate": 9.840804810211954e-06, |
|
"loss": 0.6347, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5211726384364821, |
|
"grad_norm": 0.5531025379600504, |
|
"learning_rate": 9.835771164047365e-06, |
|
"loss": 0.6367, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5246014057946168, |
|
"grad_norm": 0.5598603097452307, |
|
"learning_rate": 9.830660499438955e-06, |
|
"loss": 0.6322, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5280301731527516, |
|
"grad_norm": 0.5973164400797644, |
|
"learning_rate": 9.825472897783344e-06, |
|
"loss": 0.6363, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5314589405108864, |
|
"grad_norm": 0.6319771585799853, |
|
"learning_rate": 9.820208441702516e-06, |
|
"loss": 0.632, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5348877078690211, |
|
"grad_norm": 0.6648742107102181, |
|
"learning_rate": 9.814867215042503e-06, |
|
"loss": 0.6379, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5383164752271559, |
|
"grad_norm": 0.7342225799559353, |
|
"learning_rate": 9.809449302872045e-06, |
|
"loss": 0.6405, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5417452425852906, |
|
"grad_norm": 0.7688881238342142, |
|
"learning_rate": 9.803954791481239e-06, |
|
"loss": 0.634, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5451740099434254, |
|
"grad_norm": 0.639917210365233, |
|
"learning_rate": 9.798383768380164e-06, |
|
"loss": 0.6348, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.54860277730156, |
|
"grad_norm": 0.5045187940762229, |
|
"learning_rate": 9.792736322297489e-06, |
|
"loss": 0.6362, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5520315446596948, |
|
"grad_norm": 0.5381061183966045, |
|
"learning_rate": 9.787012543179053e-06, |
|
"loss": 0.6391, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5554603120178296, |
|
"grad_norm": 0.5429280857380482, |
|
"learning_rate": 9.781212522186442e-06, |
|
"loss": 0.6307, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5588890793759643, |
|
"grad_norm": 0.594664535184356, |
|
"learning_rate": 9.77533635169553e-06, |
|
"loss": 0.6298, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.5623178467340991, |
|
"grad_norm": 0.6702565938406244, |
|
"learning_rate": 9.769384125295012e-06, |
|
"loss": 0.6342, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5657466140922338, |
|
"grad_norm": 0.7024332099849335, |
|
"learning_rate": 9.763355937784908e-06, |
|
"loss": 0.6261, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5691753814503686, |
|
"grad_norm": 0.6596631748942144, |
|
"learning_rate": 9.757251885175063e-06, |
|
"loss": 0.6377, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5726041488085033, |
|
"grad_norm": 0.580750320713107, |
|
"learning_rate": 9.751072064683604e-06, |
|
"loss": 0.638, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.5760329161666381, |
|
"grad_norm": 0.5865851818669229, |
|
"learning_rate": 9.744816574735405e-06, |
|
"loss": 0.6257, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5794616835247729, |
|
"grad_norm": 0.6490256085654721, |
|
"learning_rate": 9.73848551496051e-06, |
|
"loss": 0.6268, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.5828904508829076, |
|
"grad_norm": 0.5699242035532047, |
|
"learning_rate": 9.732078986192552e-06, |
|
"loss": 0.6343, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5863192182410424, |
|
"grad_norm": 0.5773888152890198, |
|
"learning_rate": 9.725597090467145e-06, |
|
"loss": 0.6335, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.5897479855991771, |
|
"grad_norm": 0.5030181039807039, |
|
"learning_rate": 9.719039931020258e-06, |
|
"loss": 0.6297, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5931767529573119, |
|
"grad_norm": 0.6927945011578892, |
|
"learning_rate": 9.712407612286568e-06, |
|
"loss": 0.6317, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.5966055203154466, |
|
"grad_norm": 0.6863261538586617, |
|
"learning_rate": 9.705700239897809e-06, |
|
"loss": 0.6349, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6000342876735814, |
|
"grad_norm": 0.5863644745576692, |
|
"learning_rate": 9.698917920681072e-06, |
|
"loss": 0.6336, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.603463055031716, |
|
"grad_norm": 0.569507321040072, |
|
"learning_rate": 9.692060762657118e-06, |
|
"loss": 0.6311, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6068918223898508, |
|
"grad_norm": 0.5320598410866568, |
|
"learning_rate": 9.685128875038648e-06, |
|
"loss": 0.628, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6103205897479856, |
|
"grad_norm": 0.6614795760343849, |
|
"learning_rate": 9.678122368228571e-06, |
|
"loss": 0.6273, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6137493571061203, |
|
"grad_norm": 0.5899705788099368, |
|
"learning_rate": 9.67104135381824e-06, |
|
"loss": 0.626, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6171781244642551, |
|
"grad_norm": 0.6244814955846902, |
|
"learning_rate": 9.66388594458568e-06, |
|
"loss": 0.6202, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6206068918223898, |
|
"grad_norm": 0.5468113327023256, |
|
"learning_rate": 9.656656254493783e-06, |
|
"loss": 0.6247, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6240356591805246, |
|
"grad_norm": 0.5197583062967639, |
|
"learning_rate": 9.649352398688506e-06, |
|
"loss": 0.6341, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6274644265386593, |
|
"grad_norm": 0.578711261845795, |
|
"learning_rate": 9.641974493497024e-06, |
|
"loss": 0.6274, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6308931938967941, |
|
"grad_norm": 0.536259271020302, |
|
"learning_rate": 9.634522656425885e-06, |
|
"loss": 0.638, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6343219612549289, |
|
"grad_norm": 0.5108361690063496, |
|
"learning_rate": 9.626997006159135e-06, |
|
"loss": 0.6268, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6377507286130636, |
|
"grad_norm": 0.5258629436603048, |
|
"learning_rate": 9.619397662556434e-06, |
|
"loss": 0.6306, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6411794959711984, |
|
"grad_norm": 0.5457258768955178, |
|
"learning_rate": 9.61172474665114e-06, |
|
"loss": 0.6286, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6446082633293331, |
|
"grad_norm": 0.6608031766605356, |
|
"learning_rate": 9.603978380648375e-06, |
|
"loss": 0.6295, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6480370306874679, |
|
"grad_norm": 0.5276661545510922, |
|
"learning_rate": 9.596158687923105e-06, |
|
"loss": 0.6263, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.6514657980456026, |
|
"grad_norm": 0.530790337719698, |
|
"learning_rate": 9.588265793018141e-06, |
|
"loss": 0.625, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6548945654037374, |
|
"grad_norm": 0.562840339494226, |
|
"learning_rate": 9.58029982164218e-06, |
|
"loss": 0.6286, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.6583233327618722, |
|
"grad_norm": 0.4947380099055408, |
|
"learning_rate": 9.572260900667794e-06, |
|
"loss": 0.6236, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6617521001200068, |
|
"grad_norm": 0.47265335918997425, |
|
"learning_rate": 9.564149158129406e-06, |
|
"loss": 0.6296, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.6651808674781416, |
|
"grad_norm": 0.5775895832290389, |
|
"learning_rate": 9.555964723221258e-06, |
|
"loss": 0.6235, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.6686096348362763, |
|
"grad_norm": 0.5561593222147068, |
|
"learning_rate": 9.54770772629535e-06, |
|
"loss": 0.6203, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6720384021944111, |
|
"grad_norm": 0.5564531018664398, |
|
"learning_rate": 9.539378298859365e-06, |
|
"loss": 0.6131, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.6754671695525458, |
|
"grad_norm": 0.6036545672788025, |
|
"learning_rate": 9.53097657357457e-06, |
|
"loss": 0.6166, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.6788959369106806, |
|
"grad_norm": 0.5409723710972858, |
|
"learning_rate": 9.522502684253709e-06, |
|
"loss": 0.6229, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.6823247042688153, |
|
"grad_norm": 0.509669382435153, |
|
"learning_rate": 9.51395676585887e-06, |
|
"loss": 0.6222, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.6857534716269501, |
|
"grad_norm": 0.6082473746180043, |
|
"learning_rate": 9.505338954499332e-06, |
|
"loss": 0.6148, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6891822389850849, |
|
"grad_norm": 0.5381865337988982, |
|
"learning_rate": 9.496649387429405e-06, |
|
"loss": 0.6253, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.6926110063432196, |
|
"grad_norm": 0.5519342026599645, |
|
"learning_rate": 9.487888203046232e-06, |
|
"loss": 0.6272, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6960397737013544, |
|
"grad_norm": 0.5016332652349211, |
|
"learning_rate": 9.479055540887599e-06, |
|
"loss": 0.6228, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.6994685410594891, |
|
"grad_norm": 0.6079273353999184, |
|
"learning_rate": 9.4701515416297e-06, |
|
"loss": 0.6208, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7028973084176239, |
|
"grad_norm": 0.700304976409857, |
|
"learning_rate": 9.461176347084909e-06, |
|
"loss": 0.6214, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7063260757757586, |
|
"grad_norm": 0.5534413781804028, |
|
"learning_rate": 9.452130100199504e-06, |
|
"loss": 0.6191, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7097548431338934, |
|
"grad_norm": 0.5985964197962274, |
|
"learning_rate": 9.44301294505141e-06, |
|
"loss": 0.6232, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.7131836104920282, |
|
"grad_norm": 0.5866063492223905, |
|
"learning_rate": 9.433825026847891e-06, |
|
"loss": 0.6233, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7166123778501629, |
|
"grad_norm": 0.6647059350986494, |
|
"learning_rate": 9.42456649192324e-06, |
|
"loss": 0.6197, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.7200411452082977, |
|
"grad_norm": 0.6902179520870335, |
|
"learning_rate": 9.415237487736452e-06, |
|
"loss": 0.6162, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7234699125664323, |
|
"grad_norm": 0.5946749998378368, |
|
"learning_rate": 9.405838162868874e-06, |
|
"loss": 0.6169, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.7268986799245671, |
|
"grad_norm": 0.5785836386179494, |
|
"learning_rate": 9.396368667021835e-06, |
|
"loss": 0.6204, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7303274472827018, |
|
"grad_norm": 0.5715009908016999, |
|
"learning_rate": 9.386829151014262e-06, |
|
"loss": 0.6161, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.7337562146408366, |
|
"grad_norm": 0.5493040557418012, |
|
"learning_rate": 9.377219766780288e-06, |
|
"loss": 0.6177, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7371849819989714, |
|
"grad_norm": 0.5614424899020751, |
|
"learning_rate": 9.367540667366816e-06, |
|
"loss": 0.6213, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7406137493571061, |
|
"grad_norm": 0.5955493656424166, |
|
"learning_rate": 9.3577920069311e-06, |
|
"loss": 0.6115, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7440425167152409, |
|
"grad_norm": 0.6005939054835772, |
|
"learning_rate": 9.347973940738266e-06, |
|
"loss": 0.6146, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7474712840733756, |
|
"grad_norm": 0.5179120177804368, |
|
"learning_rate": 9.338086625158867e-06, |
|
"loss": 0.6095, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7509000514315104, |
|
"grad_norm": 0.5736625803401397, |
|
"learning_rate": 9.328130217666366e-06, |
|
"loss": 0.6184, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7543288187896451, |
|
"grad_norm": 0.48238347864003633, |
|
"learning_rate": 9.318104876834652e-06, |
|
"loss": 0.6147, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7577575861477799, |
|
"grad_norm": 0.5972927862243746, |
|
"learning_rate": 9.308010762335492e-06, |
|
"loss": 0.6239, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.7611863535059146, |
|
"grad_norm": 0.7711586637339868, |
|
"learning_rate": 9.297848034936007e-06, |
|
"loss": 0.622, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.7646151208640494, |
|
"grad_norm": 0.617930863519868, |
|
"learning_rate": 9.287616856496097e-06, |
|
"loss": 0.6049, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.7680438882221842, |
|
"grad_norm": 0.562401219116577, |
|
"learning_rate": 9.277317389965871e-06, |
|
"loss": 0.6174, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.7714726555803189, |
|
"grad_norm": 0.6288444408114011, |
|
"learning_rate": 9.266949799383053e-06, |
|
"loss": 0.6132, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.7749014229384537, |
|
"grad_norm": 0.5138761921522274, |
|
"learning_rate": 9.256514249870366e-06, |
|
"loss": 0.6144, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.7783301902965883, |
|
"grad_norm": 0.5557394107732245, |
|
"learning_rate": 9.246010907632894e-06, |
|
"loss": 0.625, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.7817589576547231, |
|
"grad_norm": 0.4785123138439094, |
|
"learning_rate": 9.235439939955458e-06, |
|
"loss": 0.6129, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.7851877250128578, |
|
"grad_norm": 0.4845330276832426, |
|
"learning_rate": 9.22480151519992e-06, |
|
"loss": 0.6249, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.7886164923709926, |
|
"grad_norm": 0.5198630830752909, |
|
"learning_rate": 9.214095802802533e-06, |
|
"loss": 0.6181, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.7920452597291274, |
|
"grad_norm": 0.5032353282124736, |
|
"learning_rate": 9.203322973271223e-06, |
|
"loss": 0.6208, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.7954740270872621, |
|
"grad_norm": 0.710468651962232, |
|
"learning_rate": 9.192483198182876e-06, |
|
"loss": 0.6148, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.7989027944453969, |
|
"grad_norm": 0.5960153692293567, |
|
"learning_rate": 9.181576650180606e-06, |
|
"loss": 0.6216, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8023315618035316, |
|
"grad_norm": 0.5541889269998591, |
|
"learning_rate": 9.170603502971017e-06, |
|
"loss": 0.6181, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8057603291616664, |
|
"grad_norm": 0.4842737593004163, |
|
"learning_rate": 9.159563931321416e-06, |
|
"loss": 0.6118, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8091890965198011, |
|
"grad_norm": 0.5527421600120501, |
|
"learning_rate": 9.148458111057043e-06, |
|
"loss": 0.617, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8126178638779359, |
|
"grad_norm": 0.517599668993277, |
|
"learning_rate": 9.13728621905827e-06, |
|
"loss": 0.6081, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.8160466312360707, |
|
"grad_norm": 0.5442628844464218, |
|
"learning_rate": 9.12604843325778e-06, |
|
"loss": 0.6211, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8194753985942054, |
|
"grad_norm": 0.5027283276915621, |
|
"learning_rate": 9.114744932637734e-06, |
|
"loss": 0.6127, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.8229041659523402, |
|
"grad_norm": 0.5296307123252137, |
|
"learning_rate": 9.103375897226919e-06, |
|
"loss": 0.613, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8263329333104749, |
|
"grad_norm": 0.5490731527916699, |
|
"learning_rate": 9.091941508097886e-06, |
|
"loss": 0.6141, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.8297617006686097, |
|
"grad_norm": 0.48494095011709015, |
|
"learning_rate": 9.080441947364065e-06, |
|
"loss": 0.6137, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8331904680267443, |
|
"grad_norm": 0.5200558540203623, |
|
"learning_rate": 9.068877398176854e-06, |
|
"loss": 0.6086, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.8366192353848791, |
|
"grad_norm": 0.5307240302552572, |
|
"learning_rate": 9.057248044722718e-06, |
|
"loss": 0.6186, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8400480027430138, |
|
"grad_norm": 0.6928206617626188, |
|
"learning_rate": 9.045554072220241e-06, |
|
"loss": 0.616, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8434767701011486, |
|
"grad_norm": 0.6694845253440784, |
|
"learning_rate": 9.033795666917191e-06, |
|
"loss": 0.6105, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8469055374592834, |
|
"grad_norm": 0.48405743005066365, |
|
"learning_rate": 9.02197301608754e-06, |
|
"loss": 0.606, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.8503343048174181, |
|
"grad_norm": 0.5821371319328923, |
|
"learning_rate": 9.010086308028487e-06, |
|
"loss": 0.6129, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8537630721755529, |
|
"grad_norm": 0.6308021249120006, |
|
"learning_rate": 8.998135732057458e-06, |
|
"loss": 0.6101, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.8571918395336876, |
|
"grad_norm": 0.6393999209710391, |
|
"learning_rate": 8.986121478509096e-06, |
|
"loss": 0.6099, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8606206068918224, |
|
"grad_norm": 0.5210274848669988, |
|
"learning_rate": 8.97404373873222e-06, |
|
"loss": 0.6136, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.8640493742499571, |
|
"grad_norm": 0.5493385621402502, |
|
"learning_rate": 8.961902705086785e-06, |
|
"loss": 0.6114, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.8674781416080919, |
|
"grad_norm": 0.565383947144019, |
|
"learning_rate": 8.949698570940816e-06, |
|
"loss": 0.6117, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.8709069089662267, |
|
"grad_norm": 0.4809292251179887, |
|
"learning_rate": 8.937431530667329e-06, |
|
"loss": 0.6163, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.8743356763243614, |
|
"grad_norm": 0.6569955954243558, |
|
"learning_rate": 8.925101779641232e-06, |
|
"loss": 0.6136, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.8777644436824962, |
|
"grad_norm": 0.7586122995884393, |
|
"learning_rate": 8.912709514236218e-06, |
|
"loss": 0.6152, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.8811932110406309, |
|
"grad_norm": 0.5863591455341749, |
|
"learning_rate": 8.900254931821632e-06, |
|
"loss": 0.6105, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.8846219783987657, |
|
"grad_norm": 0.5808595100846887, |
|
"learning_rate": 8.887738230759334e-06, |
|
"loss": 0.6103, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.8880507457569004, |
|
"grad_norm": 0.6637416525469315, |
|
"learning_rate": 8.875159610400535e-06, |
|
"loss": 0.6151, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.8914795131150351, |
|
"grad_norm": 0.635843590965775, |
|
"learning_rate": 8.862519271082624e-06, |
|
"loss": 0.6122, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.89490828047317, |
|
"grad_norm": 0.48644669408598906, |
|
"learning_rate": 8.849817414125973e-06, |
|
"loss": 0.6107, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.8983370478313046, |
|
"grad_norm": 0.7387834738899194, |
|
"learning_rate": 8.83705424183074e-06, |
|
"loss": 0.6144, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9017658151894394, |
|
"grad_norm": 0.47318607768496285, |
|
"learning_rate": 8.824229957473638e-06, |
|
"loss": 0.615, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9051945825475741, |
|
"grad_norm": 0.6099273713462994, |
|
"learning_rate": 8.811344765304698e-06, |
|
"loss": 0.6126, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9086233499057089, |
|
"grad_norm": 0.5213938292230176, |
|
"learning_rate": 8.798398870544027e-06, |
|
"loss": 0.6034, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.9120521172638436, |
|
"grad_norm": 0.5394625940524758, |
|
"learning_rate": 8.785392479378522e-06, |
|
"loss": 0.6039, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9154808846219784, |
|
"grad_norm": 0.48958440285150634, |
|
"learning_rate": 8.772325798958597e-06, |
|
"loss": 0.6029, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.9189096519801131, |
|
"grad_norm": 0.49051315574848703, |
|
"learning_rate": 8.759199037394888e-06, |
|
"loss": 0.6007, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9223384193382479, |
|
"grad_norm": 0.5004329030521917, |
|
"learning_rate": 8.746012403754924e-06, |
|
"loss": 0.6079, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.9257671866963827, |
|
"grad_norm": 0.5018875978088632, |
|
"learning_rate": 8.732766108059814e-06, |
|
"loss": 0.614, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9291959540545174, |
|
"grad_norm": 0.5162939197438497, |
|
"learning_rate": 8.719460361280888e-06, |
|
"loss": 0.6161, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.9326247214126522, |
|
"grad_norm": 0.4971369395606836, |
|
"learning_rate": 8.70609537533634e-06, |
|
"loss": 0.605, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9360534887707869, |
|
"grad_norm": 0.5016579726115805, |
|
"learning_rate": 8.692671363087864e-06, |
|
"loss": 0.6173, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.9394822561289217, |
|
"grad_norm": 0.5580053234884178, |
|
"learning_rate": 8.679188538337248e-06, |
|
"loss": 0.6087, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9429110234870564, |
|
"grad_norm": 0.4577425988677661, |
|
"learning_rate": 8.665647115822981e-06, |
|
"loss": 0.5988, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.9463397908451912, |
|
"grad_norm": 0.45683981074908053, |
|
"learning_rate": 8.652047311216823e-06, |
|
"loss": 0.6064, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.949768558203326, |
|
"grad_norm": 0.5288094567173458, |
|
"learning_rate": 8.638389341120378e-06, |
|
"loss": 0.6089, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.9531973255614606, |
|
"grad_norm": 0.5037863596617661, |
|
"learning_rate": 8.62467342306164e-06, |
|
"loss": 0.6085, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9566260929195954, |
|
"grad_norm": 0.4706120011800052, |
|
"learning_rate": 8.610899775491531e-06, |
|
"loss": 0.6023, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.9600548602777301, |
|
"grad_norm": 0.5245905876570266, |
|
"learning_rate": 8.597068617780419e-06, |
|
"loss": 0.6152, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9634836276358649, |
|
"grad_norm": 0.5294292049294829, |
|
"learning_rate": 8.583180170214625e-06, |
|
"loss": 0.6087, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.9669123949939996, |
|
"grad_norm": 0.5012747145599434, |
|
"learning_rate": 8.569234653992916e-06, |
|
"loss": 0.6074, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.9703411623521344, |
|
"grad_norm": 0.5519787441985884, |
|
"learning_rate": 8.555232291222982e-06, |
|
"loss": 0.608, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.9737699297102692, |
|
"grad_norm": 0.566179243123642, |
|
"learning_rate": 8.541173304917895e-06, |
|
"loss": 0.6057, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.9771986970684039, |
|
"grad_norm": 0.4833724946956298, |
|
"learning_rate": 8.527057918992564e-06, |
|
"loss": 0.6048, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.9806274644265387, |
|
"grad_norm": 0.5444527229286149, |
|
"learning_rate": 8.512886358260162e-06, |
|
"loss": 0.6074, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.9840562317846734, |
|
"grad_norm": 0.6265580889574098, |
|
"learning_rate": 8.498658848428541e-06, |
|
"loss": 0.6104, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.9874849991428082, |
|
"grad_norm": 0.511037692990781, |
|
"learning_rate": 8.484375616096658e-06, |
|
"loss": 0.6111, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.9909137665009429, |
|
"grad_norm": 0.618617261146988, |
|
"learning_rate": 8.470036888750941e-06, |
|
"loss": 0.6059, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.9943425338590777, |
|
"grad_norm": 0.6912314169042373, |
|
"learning_rate": 8.455642894761684e-06, |
|
"loss": 0.6109, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.9977713012172124, |
|
"grad_norm": 0.6536131654185897, |
|
"learning_rate": 8.441193863379396e-06, |
|
"loss": 0.6117, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.9998285616320932, |
|
"eval_loss": 0.6076797842979431, |
|
"eval_runtime": 204.4178, |
|
"eval_samples_per_second": 48.073, |
|
"eval_steps_per_second": 1.507, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 1.0012000685753473, |
|
"grad_norm": 0.5744253109468758, |
|
"learning_rate": 8.426690024731161e-06, |
|
"loss": 0.5965, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0046288359334818, |
|
"grad_norm": 0.6126234725401744, |
|
"learning_rate": 8.412131609816968e-06, |
|
"loss": 0.5426, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 1.0080576032916166, |
|
"grad_norm": 0.5502946067152649, |
|
"learning_rate": 8.39751885050603e-06, |
|
"loss": 0.5483, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0114863706497514, |
|
"grad_norm": 0.5203632756557842, |
|
"learning_rate": 8.382851979533095e-06, |
|
"loss": 0.5429, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.0149151380078862, |
|
"grad_norm": 0.5256969786092444, |
|
"learning_rate": 8.36813123049474e-06, |
|
"loss": 0.5416, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0183439053660208, |
|
"grad_norm": 0.5199622468399526, |
|
"learning_rate": 8.353356837845642e-06, |
|
"loss": 0.5497, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 1.0217726727241556, |
|
"grad_norm": 0.5409282245995303, |
|
"learning_rate": 8.338529036894855e-06, |
|
"loss": 0.549, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.0252014400822904, |
|
"grad_norm": 0.4884152712675349, |
|
"learning_rate": 8.323648063802056e-06, |
|
"loss": 0.5444, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 1.0286302074404252, |
|
"grad_norm": 0.5014268314366117, |
|
"learning_rate": 8.308714155573785e-06, |
|
"loss": 0.5495, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.03205897479856, |
|
"grad_norm": 0.4846536766697211, |
|
"learning_rate": 8.293727550059668e-06, |
|
"loss": 0.5457, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 1.0354877421566946, |
|
"grad_norm": 0.5390111306520632, |
|
"learning_rate": 8.278688485948634e-06, |
|
"loss": 0.5479, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0389165095148294, |
|
"grad_norm": 0.5668492664534543, |
|
"learning_rate": 8.263597202765109e-06, |
|
"loss": 0.5412, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 1.0423452768729642, |
|
"grad_norm": 0.49509927353990013, |
|
"learning_rate": 8.248453940865204e-06, |
|
"loss": 0.5509, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.045774044231099, |
|
"grad_norm": 0.5658638682509576, |
|
"learning_rate": 8.233258941432883e-06, |
|
"loss": 0.5466, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.0492028115892336, |
|
"grad_norm": 0.599624817317566, |
|
"learning_rate": 8.218012446476128e-06, |
|
"loss": 0.5456, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.5811724248156606, |
|
"learning_rate": 8.202714698823078e-06, |
|
"loss": 0.5478, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 1.0560603463055032, |
|
"grad_norm": 0.5829817578564964, |
|
"learning_rate": 8.187365942118162e-06, |
|
"loss": 0.5447, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.059489113663638, |
|
"grad_norm": 0.6195689312853232, |
|
"learning_rate": 8.171966420818227e-06, |
|
"loss": 0.5462, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 1.0629178810217728, |
|
"grad_norm": 0.5753478735487544, |
|
"learning_rate": 8.156516380188635e-06, |
|
"loss": 0.5461, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0663466483799073, |
|
"grad_norm": 0.7022154988882346, |
|
"learning_rate": 8.14101606629936e-06, |
|
"loss": 0.5499, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.0697754157380421, |
|
"grad_norm": 0.6777327419801819, |
|
"learning_rate": 8.125465726021068e-06, |
|
"loss": 0.5499, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.073204183096177, |
|
"grad_norm": 0.5453482327699332, |
|
"learning_rate": 8.10986560702119e-06, |
|
"loss": 0.5503, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 1.0766329504543117, |
|
"grad_norm": 0.5825073699218344, |
|
"learning_rate": 8.09421595775997e-06, |
|
"loss": 0.5451, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.0800617178124465, |
|
"grad_norm": 0.5964267696290744, |
|
"learning_rate": 8.07851702748651e-06, |
|
"loss": 0.5497, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.083490485170581, |
|
"grad_norm": 0.5945145884647497, |
|
"learning_rate": 8.062769066234807e-06, |
|
"loss": 0.5467, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.086919252528716, |
|
"grad_norm": 0.5596939286945352, |
|
"learning_rate": 8.046972324819762e-06, |
|
"loss": 0.5469, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 1.0903480198868507, |
|
"grad_norm": 0.5511374508562613, |
|
"learning_rate": 8.031127054833192e-06, |
|
"loss": 0.5473, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.0937767872449855, |
|
"grad_norm": 0.5908893744280362, |
|
"learning_rate": 8.015233508639811e-06, |
|
"loss": 0.5504, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 1.09720555460312, |
|
"grad_norm": 0.6257657254754231, |
|
"learning_rate": 7.999291939373232e-06, |
|
"loss": 0.5473, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.1006343219612549, |
|
"grad_norm": 0.6187428942884239, |
|
"learning_rate": 7.983302600931911e-06, |
|
"loss": 0.5509, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 1.1040630893193897, |
|
"grad_norm": 0.5783570762769669, |
|
"learning_rate": 7.967265747975124e-06, |
|
"loss": 0.5507, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.1074918566775245, |
|
"grad_norm": 0.6002185456345444, |
|
"learning_rate": 7.951181635918895e-06, |
|
"loss": 0.5462, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 1.1109206240356593, |
|
"grad_norm": 0.5515256307910629, |
|
"learning_rate": 7.93505052093194e-06, |
|
"loss": 0.5533, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1143493913937939, |
|
"grad_norm": 0.6078936624704993, |
|
"learning_rate": 7.91887265993158e-06, |
|
"loss": 0.5472, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.1177781587519287, |
|
"grad_norm": 0.5442571913199759, |
|
"learning_rate": 7.90264831057965e-06, |
|
"loss": 0.5486, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.1212069261100635, |
|
"grad_norm": 0.5524504533522675, |
|
"learning_rate": 7.8863777312784e-06, |
|
"loss": 0.5456, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 1.1246356934681982, |
|
"grad_norm": 0.543132690088679, |
|
"learning_rate": 7.870061181166372e-06, |
|
"loss": 0.5541, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1280644608263328, |
|
"grad_norm": 0.5329328262846712, |
|
"learning_rate": 7.853698920114281e-06, |
|
"loss": 0.5528, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 1.1314932281844676, |
|
"grad_norm": 0.5139681859606589, |
|
"learning_rate": 7.837291208720867e-06, |
|
"loss": 0.5539, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.1349219955426024, |
|
"grad_norm": 0.5328884447403797, |
|
"learning_rate": 7.820838308308751e-06, |
|
"loss": 0.5489, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 1.1383507629007372, |
|
"grad_norm": 0.6114939943677603, |
|
"learning_rate": 7.804340480920274e-06, |
|
"loss": 0.5487, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.141779530258872, |
|
"grad_norm": 0.6051189586236244, |
|
"learning_rate": 7.787797989313317e-06, |
|
"loss": 0.5488, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 1.1452082976170066, |
|
"grad_norm": 0.5408630245450696, |
|
"learning_rate": 7.771211096957125e-06, |
|
"loss": 0.552, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.1486370649751414, |
|
"grad_norm": 0.5456867423021694, |
|
"learning_rate": 7.754580068028096e-06, |
|
"loss": 0.5469, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.1520658323332762, |
|
"grad_norm": 0.5474469138093225, |
|
"learning_rate": 7.737905167405596e-06, |
|
"loss": 0.5527, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.155494599691411, |
|
"grad_norm": 0.521490633503822, |
|
"learning_rate": 7.721186660667715e-06, |
|
"loss": 0.5476, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 1.1589233670495456, |
|
"grad_norm": 0.6191340910966903, |
|
"learning_rate": 7.704424814087056e-06, |
|
"loss": 0.5466, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.1623521344076804, |
|
"grad_norm": 0.5052183730102225, |
|
"learning_rate": 7.687619894626493e-06, |
|
"loss": 0.555, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 1.1657809017658152, |
|
"grad_norm": 0.5722854768006315, |
|
"learning_rate": 7.670772169934902e-06, |
|
"loss": 0.5513, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.16920966912395, |
|
"grad_norm": 0.5067922499590597, |
|
"learning_rate": 7.653881908342916e-06, |
|
"loss": 0.5489, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 1.1726384364820848, |
|
"grad_norm": 0.5604900851332252, |
|
"learning_rate": 7.636949378858647e-06, |
|
"loss": 0.5389, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.1760672038402193, |
|
"grad_norm": 0.5018446215200031, |
|
"learning_rate": 7.6199748511634005e-06, |
|
"loss": 0.5463, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.1794959711983541, |
|
"grad_norm": 0.48196349070673195, |
|
"learning_rate": 7.602958595607375e-06, |
|
"loss": 0.549, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.182924738556489, |
|
"grad_norm": 0.5951894113068615, |
|
"learning_rate": 7.5859008832053636e-06, |
|
"loss": 0.5484, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.1863535059146237, |
|
"grad_norm": 0.6230046151022545, |
|
"learning_rate": 7.568801985632439e-06, |
|
"loss": 0.5495, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.1897822732727585, |
|
"grad_norm": 0.5119551401124481, |
|
"learning_rate": 7.551662175219614e-06, |
|
"loss": 0.5466, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 1.1932110406308931, |
|
"grad_norm": 0.5302998077477141, |
|
"learning_rate": 7.5344817249495195e-06, |
|
"loss": 0.539, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.196639807989028, |
|
"grad_norm": 0.5804990070353486, |
|
"learning_rate": 7.51726090845205e-06, |
|
"loss": 0.5503, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 1.2000685753471627, |
|
"grad_norm": 0.5861147333730155, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.5481, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2034973427052975, |
|
"grad_norm": 0.5966836470331061, |
|
"learning_rate": 7.482699274504708e-06, |
|
"loss": 0.5438, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.2069261100634323, |
|
"grad_norm": 0.5203444560983739, |
|
"learning_rate": 7.465359007511667e-06, |
|
"loss": 0.5476, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2103548774215669, |
|
"grad_norm": 0.4994889019945669, |
|
"learning_rate": 7.44797947519614e-06, |
|
"loss": 0.5526, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 1.2137836447797017, |
|
"grad_norm": 0.5743096754445195, |
|
"learning_rate": 7.430560954358764e-06, |
|
"loss": 0.5484, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.2172124121378365, |
|
"grad_norm": 0.5032391615298711, |
|
"learning_rate": 7.413103722421139e-06, |
|
"loss": 0.5431, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.2206411794959713, |
|
"grad_norm": 0.5072263050309479, |
|
"learning_rate": 7.395608057421406e-06, |
|
"loss": 0.5415, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2240699468541059, |
|
"grad_norm": 0.5302212227201574, |
|
"learning_rate": 7.378074238009827e-06, |
|
"loss": 0.5375, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.2274987142122407, |
|
"grad_norm": 0.5042827018544307, |
|
"learning_rate": 7.360502543444339e-06, |
|
"loss": 0.5493, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2309274815703755, |
|
"grad_norm": 0.5067327603110284, |
|
"learning_rate": 7.342893253586112e-06, |
|
"loss": 0.5547, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 1.2343562489285103, |
|
"grad_norm": 0.5008933072194933, |
|
"learning_rate": 7.325246648895089e-06, |
|
"loss": 0.541, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.237785016286645, |
|
"grad_norm": 0.5065899468061212, |
|
"learning_rate": 7.307563010425517e-06, |
|
"loss": 0.5497, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 1.2412137836447796, |
|
"grad_norm": 0.5456582166819376, |
|
"learning_rate": 7.289842619821475e-06, |
|
"loss": 0.5501, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.2446425510029144, |
|
"grad_norm": 0.625178112160367, |
|
"learning_rate": 7.272085759312389e-06, |
|
"loss": 0.5478, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 1.2480713183610492, |
|
"grad_norm": 0.5527992246013412, |
|
"learning_rate": 7.254292711708529e-06, |
|
"loss": 0.5527, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.251500085719184, |
|
"grad_norm": 0.6681908362750082, |
|
"learning_rate": 7.236463760396516e-06, |
|
"loss": 0.5627, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.2549288530773186, |
|
"grad_norm": 0.5414761906585449, |
|
"learning_rate": 7.218599189334799e-06, |
|
"loss": 0.5437, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.2583576204354534, |
|
"grad_norm": 0.545519378308768, |
|
"learning_rate": 7.200699283049138e-06, |
|
"loss": 0.5458, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 1.2617863877935882, |
|
"grad_norm": 0.5175128490097378, |
|
"learning_rate": 7.182764326628068e-06, |
|
"loss": 0.5471, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.265215155151723, |
|
"grad_norm": 0.5436775659977866, |
|
"learning_rate": 7.164794605718366e-06, |
|
"loss": 0.5515, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 1.2686439225098578, |
|
"grad_norm": 0.6110723983754518, |
|
"learning_rate": 7.146790406520491e-06, |
|
"loss": 0.5456, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.2720726898679924, |
|
"grad_norm": 0.5770751655663271, |
|
"learning_rate": 7.128752015784036e-06, |
|
"loss": 0.5443, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 1.2755014572261272, |
|
"grad_norm": 0.4962264740154398, |
|
"learning_rate": 7.1106797208031554e-06, |
|
"loss": 0.5455, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.278930224584262, |
|
"grad_norm": 0.5486471659395239, |
|
"learning_rate": 7.0925738094119865e-06, |
|
"loss": 0.5519, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 1.2823589919423968, |
|
"grad_norm": 0.5503873404673431, |
|
"learning_rate": 7.0744345699800755e-06, |
|
"loss": 0.5478, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.2857877593005314, |
|
"grad_norm": 0.5042441453375152, |
|
"learning_rate": 7.056262291407773e-06, |
|
"loss": 0.5495, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.2892165266586662, |
|
"grad_norm": 0.5245616326038465, |
|
"learning_rate": 7.038057263121639e-06, |
|
"loss": 0.5397, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.292645294016801, |
|
"grad_norm": 0.5294940877618698, |
|
"learning_rate": 7.019819775069834e-06, |
|
"loss": 0.5403, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 1.2960740613749357, |
|
"grad_norm": 0.6147918213943395, |
|
"learning_rate": 7.001550117717499e-06, |
|
"loss": 0.5405, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.2995028287330705, |
|
"grad_norm": 0.5184070224956804, |
|
"learning_rate": 6.983248582042126e-06, |
|
"loss": 0.5481, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 1.3029315960912053, |
|
"grad_norm": 0.5295411569445538, |
|
"learning_rate": 6.9649154595289326e-06, |
|
"loss": 0.5466, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.30636036344934, |
|
"grad_norm": 0.49956769450406724, |
|
"learning_rate": 6.94655104216621e-06, |
|
"loss": 0.5461, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 1.3097891308074747, |
|
"grad_norm": 0.5529813856201232, |
|
"learning_rate": 6.92815562244068e-06, |
|
"loss": 0.5456, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.3132178981656095, |
|
"grad_norm": 0.48747897180543187, |
|
"learning_rate": 6.90972949333283e-06, |
|
"loss": 0.5506, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 1.316646665523744, |
|
"grad_norm": 0.5423084499887999, |
|
"learning_rate": 6.891272948312251e-06, |
|
"loss": 0.5497, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.320075432881879, |
|
"grad_norm": 0.4982712510800406, |
|
"learning_rate": 6.872786281332965e-06, |
|
"loss": 0.5493, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.3235042002400137, |
|
"grad_norm": 0.47720857931691807, |
|
"learning_rate": 6.854269786828741e-06, |
|
"loss": 0.5471, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.3269329675981485, |
|
"grad_norm": 0.5568806463136045, |
|
"learning_rate": 6.8357237597084015e-06, |
|
"loss": 0.5527, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.3303617349562833, |
|
"grad_norm": 0.510634543124876, |
|
"learning_rate": 6.817148495351131e-06, |
|
"loss": 0.5495, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.333790502314418, |
|
"grad_norm": 0.5164556362855032, |
|
"learning_rate": 6.7985442896017765e-06, |
|
"loss": 0.5463, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.3372192696725527, |
|
"grad_norm": 0.5254656454551451, |
|
"learning_rate": 6.779911438766117e-06, |
|
"loss": 0.5426, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.3406480370306875, |
|
"grad_norm": 0.49029587603019725, |
|
"learning_rate": 6.7612502396061685e-06, |
|
"loss": 0.5429, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.3440768043888223, |
|
"grad_norm": 0.4789585618635431, |
|
"learning_rate": 6.742560989335438e-06, |
|
"loss": 0.543, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.3475055717469568, |
|
"grad_norm": 0.5351724574021284, |
|
"learning_rate": 6.723843985614201e-06, |
|
"loss": 0.549, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.3509343391050916, |
|
"grad_norm": 0.5285309854529648, |
|
"learning_rate": 6.705099526544757e-06, |
|
"loss": 0.5491, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.3543631064632264, |
|
"grad_norm": 0.5744541149885962, |
|
"learning_rate": 6.686327910666679e-06, |
|
"loss": 0.5469, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.3577918738213612, |
|
"grad_norm": 0.5276476840483824, |
|
"learning_rate": 6.667529436952064e-06, |
|
"loss": 0.5501, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.361220641179496, |
|
"grad_norm": 0.5136271937577194, |
|
"learning_rate": 6.6487044048007696e-06, |
|
"loss": 0.5457, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.3646494085376308, |
|
"grad_norm": 0.46138577500457834, |
|
"learning_rate": 6.629853114035643e-06, |
|
"loss": 0.5461, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.3680781758957654, |
|
"grad_norm": 0.4895982866432622, |
|
"learning_rate": 6.610975864897746e-06, |
|
"loss": 0.5392, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.3715069432539002, |
|
"grad_norm": 0.5042414486514584, |
|
"learning_rate": 6.5920729580415795e-06, |
|
"loss": 0.5463, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.374935710612035, |
|
"grad_norm": 0.507137387593949, |
|
"learning_rate": 6.573144694530287e-06, |
|
"loss": 0.5486, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.3783644779701696, |
|
"grad_norm": 0.5035112896994087, |
|
"learning_rate": 6.554191375830861e-06, |
|
"loss": 0.5399, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.3817932453283044, |
|
"grad_norm": 0.5191434631626274, |
|
"learning_rate": 6.535213303809349e-06, |
|
"loss": 0.5462, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.3852220126864392, |
|
"grad_norm": 0.5215645909213554, |
|
"learning_rate": 6.516210780726032e-06, |
|
"loss": 0.5492, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.388650780044574, |
|
"grad_norm": 0.4955063426711122, |
|
"learning_rate": 6.497184109230628e-06, |
|
"loss": 0.547, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.3920795474027088, |
|
"grad_norm": 0.5083904799973671, |
|
"learning_rate": 6.478133592357455e-06, |
|
"loss": 0.5495, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.3955083147608436, |
|
"grad_norm": 0.5413459772025052, |
|
"learning_rate": 6.4590595335206154e-06, |
|
"loss": 0.5471, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.3989370821189782, |
|
"grad_norm": 0.5159096563706932, |
|
"learning_rate": 6.43996223650916e-06, |
|
"loss": 0.5475, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.402365849477113, |
|
"grad_norm": 0.5232964962054851, |
|
"learning_rate": 6.420842005482248e-06, |
|
"loss": 0.5526, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.4057946168352478, |
|
"grad_norm": 0.4879763438255916, |
|
"learning_rate": 6.401699144964306e-06, |
|
"loss": 0.5563, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.4092233841933826, |
|
"grad_norm": 0.49746751110206966, |
|
"learning_rate": 6.382533959840178e-06, |
|
"loss": 0.5401, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.4126521515515171, |
|
"grad_norm": 0.551660200659416, |
|
"learning_rate": 6.3633467553502625e-06, |
|
"loss": 0.5464, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.416080918909652, |
|
"grad_norm": 0.5127978902578759, |
|
"learning_rate": 6.344137837085662e-06, |
|
"loss": 0.5436, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.4195096862677867, |
|
"grad_norm": 0.528021714081405, |
|
"learning_rate": 6.32490751098331e-06, |
|
"loss": 0.5475, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.4229384536259215, |
|
"grad_norm": 0.5235426406662949, |
|
"learning_rate": 6.305656083321092e-06, |
|
"loss": 0.5495, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.4263672209840563, |
|
"grad_norm": 0.5932716644747056, |
|
"learning_rate": 6.286383860712982e-06, |
|
"loss": 0.5437, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.429795988342191, |
|
"grad_norm": 0.5329514416163247, |
|
"learning_rate": 6.2670911501041495e-06, |
|
"loss": 0.5537, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.4332247557003257, |
|
"grad_norm": 0.5203312437593606, |
|
"learning_rate": 6.247778258766069e-06, |
|
"loss": 0.5453, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.4366535230584605, |
|
"grad_norm": 0.5530831064402718, |
|
"learning_rate": 6.228445494291635e-06, |
|
"loss": 0.5441, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.4400822904165953, |
|
"grad_norm": 0.5356248838373525, |
|
"learning_rate": 6.209093164590253e-06, |
|
"loss": 0.5467, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.4435110577747299, |
|
"grad_norm": 0.507854588281778, |
|
"learning_rate": 6.189721577882942e-06, |
|
"loss": 0.5473, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.4469398251328647, |
|
"grad_norm": 0.5020349063977403, |
|
"learning_rate": 6.170331042697425e-06, |
|
"loss": 0.5439, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.4503685924909995, |
|
"grad_norm": 0.5299904655120313, |
|
"learning_rate": 6.150921867863215e-06, |
|
"loss": 0.5429, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.4537973598491343, |
|
"grad_norm": 0.5599366493504166, |
|
"learning_rate": 6.131494362506693e-06, |
|
"loss": 0.5479, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.457226127207269, |
|
"grad_norm": 0.5419693437182285, |
|
"learning_rate": 6.112048836046185e-06, |
|
"loss": 0.5515, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.4606548945654039, |
|
"grad_norm": 0.5619194156719064, |
|
"learning_rate": 6.09258559818704e-06, |
|
"loss": 0.5493, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.4640836619235384, |
|
"grad_norm": 0.5180975989810822, |
|
"learning_rate": 6.0731049589166895e-06, |
|
"loss": 0.5447, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.4675124292816732, |
|
"grad_norm": 0.47862105476236594, |
|
"learning_rate": 6.053607228499719e-06, |
|
"loss": 0.5411, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.470941196639808, |
|
"grad_norm": 0.5019570636347335, |
|
"learning_rate": 6.034092717472917e-06, |
|
"loss": 0.5479, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.4743699639979426, |
|
"grad_norm": 0.5005596379353178, |
|
"learning_rate": 6.014561736640334e-06, |
|
"loss": 0.5435, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.4777987313560774, |
|
"grad_norm": 0.4759889724446824, |
|
"learning_rate": 5.9950145970683375e-06, |
|
"loss": 0.5434, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 1.4812274987142122, |
|
"grad_norm": 0.47708542568255435, |
|
"learning_rate": 5.975451610080643e-06, |
|
"loss": 0.5382, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.484656266072347, |
|
"grad_norm": 0.507797233049438, |
|
"learning_rate": 5.955873087253371e-06, |
|
"loss": 0.5488, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.4880850334304818, |
|
"grad_norm": 0.48901155035523547, |
|
"learning_rate": 5.936279340410082e-06, |
|
"loss": 0.5379, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.4915138007886166, |
|
"grad_norm": 0.5482350970529439, |
|
"learning_rate": 5.916670681616797e-06, |
|
"loss": 0.5444, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.4949425681467512, |
|
"grad_norm": 0.48909311036019854, |
|
"learning_rate": 5.8970474231770445e-06, |
|
"loss": 0.5438, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.498371335504886, |
|
"grad_norm": 0.5029129982744719, |
|
"learning_rate": 5.877409877626876e-06, |
|
"loss": 0.543, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.5018001028630208, |
|
"grad_norm": 0.49508908810255237, |
|
"learning_rate": 5.857758357729892e-06, |
|
"loss": 0.5401, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.5052288702211554, |
|
"grad_norm": 0.5186990415007219, |
|
"learning_rate": 5.838093176472259e-06, |
|
"loss": 0.544, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 1.5086576375792902, |
|
"grad_norm": 0.5236949094459556, |
|
"learning_rate": 5.8184146470577265e-06, |
|
"loss": 0.5334, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.512086404937425, |
|
"grad_norm": 0.4893285394654287, |
|
"learning_rate": 5.798723082902636e-06, |
|
"loss": 0.5433, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.5155151722955598, |
|
"grad_norm": 0.4907763180175273, |
|
"learning_rate": 5.779018797630934e-06, |
|
"loss": 0.543, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.5189439396536946, |
|
"grad_norm": 0.5637821736714953, |
|
"learning_rate": 5.75930210506917e-06, |
|
"loss": 0.5406, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 1.5223727070118294, |
|
"grad_norm": 0.5100910825032349, |
|
"learning_rate": 5.739573319241505e-06, |
|
"loss": 0.5397, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.5258014743699642, |
|
"grad_norm": 0.45580474474329813, |
|
"learning_rate": 5.719832754364707e-06, |
|
"loss": 0.5468, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.5292302417280987, |
|
"grad_norm": 0.5045515176992598, |
|
"learning_rate": 5.7000807248431466e-06, |
|
"loss": 0.5367, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.5326590090862335, |
|
"grad_norm": 0.4933966502093522, |
|
"learning_rate": 5.680317545263786e-06, |
|
"loss": 0.5461, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.5360877764443681, |
|
"grad_norm": 0.5029727396999837, |
|
"learning_rate": 5.66054353039118e-06, |
|
"loss": 0.5419, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.539516543802503, |
|
"grad_norm": 0.5034621640442088, |
|
"learning_rate": 5.640758995162446e-06, |
|
"loss": 0.5425, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.5429453111606377, |
|
"grad_norm": 0.5084425747704971, |
|
"learning_rate": 5.620964254682267e-06, |
|
"loss": 0.5414, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.5463740785187725, |
|
"grad_norm": 0.505196202702042, |
|
"learning_rate": 5.601159624217854e-06, |
|
"loss": 0.5474, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.5498028458769073, |
|
"grad_norm": 0.5308371720246889, |
|
"learning_rate": 5.58134541919394e-06, |
|
"loss": 0.5433, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.553231613235042, |
|
"grad_norm": 0.4934713800815739, |
|
"learning_rate": 5.5615219551877476e-06, |
|
"loss": 0.5504, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.556660380593177, |
|
"grad_norm": 0.46471141316877956, |
|
"learning_rate": 5.5416895479239665e-06, |
|
"loss": 0.5413, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.5600891479513115, |
|
"grad_norm": 0.47931849486997197, |
|
"learning_rate": 5.521848513269723e-06, |
|
"loss": 0.5373, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.5635179153094463, |
|
"grad_norm": 0.4684090998446449, |
|
"learning_rate": 5.501999167229554e-06, |
|
"loss": 0.5419, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.5669466826675809, |
|
"grad_norm": 0.5051008793716584, |
|
"learning_rate": 5.482141825940363e-06, |
|
"loss": 0.539, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.5703754500257157, |
|
"grad_norm": 0.5270612668963798, |
|
"learning_rate": 5.4622768056664e-06, |
|
"loss": 0.552, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.5738042173838505, |
|
"grad_norm": 0.5102700734031906, |
|
"learning_rate": 5.442404422794212e-06, |
|
"loss": 0.5413, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.5772329847419853, |
|
"grad_norm": 0.4651045699311542, |
|
"learning_rate": 5.42252499382761e-06, |
|
"loss": 0.5457, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.58066175210012, |
|
"grad_norm": 0.48408561746393897, |
|
"learning_rate": 5.402638835382628e-06, |
|
"loss": 0.5454, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.5840905194582549, |
|
"grad_norm": 0.4886707605239818, |
|
"learning_rate": 5.38274626418248e-06, |
|
"loss": 0.5404, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.5875192868163897, |
|
"grad_norm": 0.5358058957587754, |
|
"learning_rate": 5.362847597052509e-06, |
|
"loss": 0.5407, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.5909480541745242, |
|
"grad_norm": 0.4969840985996426, |
|
"learning_rate": 5.3429431509151515e-06, |
|
"loss": 0.5408, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.594376821532659, |
|
"grad_norm": 0.4903079059609584, |
|
"learning_rate": 5.323033242784889e-06, |
|
"loss": 0.5368, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.5978055888907936, |
|
"grad_norm": 0.4738390208380356, |
|
"learning_rate": 5.303118189763187e-06, |
|
"loss": 0.5482, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.6012343562489284, |
|
"grad_norm": 0.5096387167156792, |
|
"learning_rate": 5.283198309033457e-06, |
|
"loss": 0.5476, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.6046631236070632, |
|
"grad_norm": 0.5265182211890799, |
|
"learning_rate": 5.263273917856e-06, |
|
"loss": 0.5363, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.608091890965198, |
|
"grad_norm": 0.5159782309776882, |
|
"learning_rate": 5.243345333562954e-06, |
|
"loss": 0.5453, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.6115206583233328, |
|
"grad_norm": 0.4866586786251063, |
|
"learning_rate": 5.22341287355324e-06, |
|
"loss": 0.5366, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.6149494256814676, |
|
"grad_norm": 0.5069139373526034, |
|
"learning_rate": 5.203476855287507e-06, |
|
"loss": 0.5424, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.6183781930396024, |
|
"grad_norm": 0.47559808993135405, |
|
"learning_rate": 5.183537596283075e-06, |
|
"loss": 0.5437, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.621806960397737, |
|
"grad_norm": 0.4777303119256066, |
|
"learning_rate": 5.1635954141088815e-06, |
|
"loss": 0.5432, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.6252357277558718, |
|
"grad_norm": 0.46998224709647424, |
|
"learning_rate": 5.143650626380417e-06, |
|
"loss": 0.5487, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.6286644951140063, |
|
"grad_norm": 0.5020515855916554, |
|
"learning_rate": 5.123703550754669e-06, |
|
"loss": 0.5437, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.6320932624721411, |
|
"grad_norm": 0.4986048548791825, |
|
"learning_rate": 5.103754504925071e-06, |
|
"loss": 0.548, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.635522029830276, |
|
"grad_norm": 0.49818017928688363, |
|
"learning_rate": 5.083803806616428e-06, |
|
"loss": 0.5407, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.6389507971884107, |
|
"grad_norm": 0.4687036183917943, |
|
"learning_rate": 5.06385177357987e-06, |
|
"loss": 0.5418, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.6423795645465455, |
|
"grad_norm": 0.5165271708659772, |
|
"learning_rate": 5.043898723587779e-06, |
|
"loss": 0.5473, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.6458083319046803, |
|
"grad_norm": 0.583421869027607, |
|
"learning_rate": 5.023944974428739e-06, |
|
"loss": 0.5383, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.6492370992628151, |
|
"grad_norm": 0.508317228928292, |
|
"learning_rate": 5.003990843902463e-06, |
|
"loss": 0.5386, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.6526658666209497, |
|
"grad_norm": 0.5359492047497594, |
|
"learning_rate": 4.9840366498147495e-06, |
|
"loss": 0.5425, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.6560946339790845, |
|
"grad_norm": 0.5183122191069008, |
|
"learning_rate": 4.964082709972394e-06, |
|
"loss": 0.5502, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.6595234013372193, |
|
"grad_norm": 0.5124930538710774, |
|
"learning_rate": 4.944129342178156e-06, |
|
"loss": 0.54, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.662952168695354, |
|
"grad_norm": 0.5037854005505844, |
|
"learning_rate": 4.924176864225678e-06, |
|
"loss": 0.538, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.6663809360534887, |
|
"grad_norm": 0.5034524367425028, |
|
"learning_rate": 4.90422559389443e-06, |
|
"loss": 0.5342, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.6698097034116235, |
|
"grad_norm": 0.48847928157114906, |
|
"learning_rate": 4.88427584894465e-06, |
|
"loss": 0.5414, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.6732384707697583, |
|
"grad_norm": 0.5197993207547689, |
|
"learning_rate": 4.864327947112281e-06, |
|
"loss": 0.5437, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.676667238127893, |
|
"grad_norm": 0.4979107929661232, |
|
"learning_rate": 4.84438220610391e-06, |
|
"loss": 0.541, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.6800960054860279, |
|
"grad_norm": 0.47357100832452387, |
|
"learning_rate": 4.82443894359171e-06, |
|
"loss": 0.5375, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.6835247728441627, |
|
"grad_norm": 0.47499915823273264, |
|
"learning_rate": 4.804498477208382e-06, |
|
"loss": 0.5435, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.6869535402022973, |
|
"grad_norm": 0.5246936291111205, |
|
"learning_rate": 4.784561124542088e-06, |
|
"loss": 0.5399, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.690382307560432, |
|
"grad_norm": 0.5396195065092629, |
|
"learning_rate": 4.764627203131401e-06, |
|
"loss": 0.5323, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.6938110749185666, |
|
"grad_norm": 0.49550562745484766, |
|
"learning_rate": 4.744697030460248e-06, |
|
"loss": 0.54, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.6972398422767014, |
|
"grad_norm": 0.4834798755139004, |
|
"learning_rate": 4.724770923952844e-06, |
|
"loss": 0.5441, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.7006686096348362, |
|
"grad_norm": 0.5285955350607044, |
|
"learning_rate": 4.7048492009686525e-06, |
|
"loss": 0.5391, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.704097376992971, |
|
"grad_norm": 0.5121886893362284, |
|
"learning_rate": 4.6849321787973116e-06, |
|
"loss": 0.5406, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.7075261443511058, |
|
"grad_norm": 0.5563183347246416, |
|
"learning_rate": 4.6650201746535926e-06, |
|
"loss": 0.5381, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.7109549117092406, |
|
"grad_norm": 0.49534162399833953, |
|
"learning_rate": 4.645113505672353e-06, |
|
"loss": 0.5359, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.7143836790673754, |
|
"grad_norm": 0.4754638729290019, |
|
"learning_rate": 4.625212488903467e-06, |
|
"loss": 0.5411, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.71781244642551, |
|
"grad_norm": 0.4701836831162324, |
|
"learning_rate": 4.605317441306798e-06, |
|
"loss": 0.5362, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.7212412137836448, |
|
"grad_norm": 0.48575768441353867, |
|
"learning_rate": 4.585428679747133e-06, |
|
"loss": 0.5419, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.7246699811417794, |
|
"grad_norm": 0.561176281859862, |
|
"learning_rate": 4.565546520989139e-06, |
|
"loss": 0.5361, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.7280987484999142, |
|
"grad_norm": 0.4991619958671278, |
|
"learning_rate": 4.545671281692331e-06, |
|
"loss": 0.5379, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.731527515858049, |
|
"grad_norm": 0.4894062203682473, |
|
"learning_rate": 4.525803278406011e-06, |
|
"loss": 0.5359, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.7349562832161838, |
|
"grad_norm": 0.4811044764793809, |
|
"learning_rate": 4.505942827564242e-06, |
|
"loss": 0.5426, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.7383850505743186, |
|
"grad_norm": 0.48626341909810544, |
|
"learning_rate": 4.4860902454807905e-06, |
|
"loss": 0.5336, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.7418138179324534, |
|
"grad_norm": 0.4677680916526279, |
|
"learning_rate": 4.466245848344106e-06, |
|
"loss": 0.5408, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.7452425852905882, |
|
"grad_norm": 0.49984747393620377, |
|
"learning_rate": 4.446409952212278e-06, |
|
"loss": 0.5347, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.7486713526487228, |
|
"grad_norm": 0.4932054681232252, |
|
"learning_rate": 4.426582873007999e-06, |
|
"loss": 0.5451, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.7521001200068576, |
|
"grad_norm": 0.4794644104456506, |
|
"learning_rate": 4.406764926513536e-06, |
|
"loss": 0.5403, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.7555288873649921, |
|
"grad_norm": 0.45717917978063133, |
|
"learning_rate": 4.386956428365701e-06, |
|
"loss": 0.5379, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.758957654723127, |
|
"grad_norm": 0.46935105917323766, |
|
"learning_rate": 4.36715769405083e-06, |
|
"loss": 0.5366, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.7623864220812617, |
|
"grad_norm": 0.4839602278611378, |
|
"learning_rate": 4.347369038899744e-06, |
|
"loss": 0.5403, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.7658151894393965, |
|
"grad_norm": 0.4996673443974444, |
|
"learning_rate": 4.327590778082734e-06, |
|
"loss": 0.5413, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.7692439567975313, |
|
"grad_norm": 0.47519860849274637, |
|
"learning_rate": 4.307823226604555e-06, |
|
"loss": 0.5402, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.7726727241556661, |
|
"grad_norm": 0.47545446063997776, |
|
"learning_rate": 4.28806669929938e-06, |
|
"loss": 0.5374, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.776101491513801, |
|
"grad_norm": 0.5148035850870979, |
|
"learning_rate": 4.2683215108258145e-06, |
|
"loss": 0.5392, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.7795302588719355, |
|
"grad_norm": 0.49138867529600283, |
|
"learning_rate": 4.2485879756618685e-06, |
|
"loss": 0.5377, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.7829590262300703, |
|
"grad_norm": 0.4888120553433302, |
|
"learning_rate": 4.228866408099945e-06, |
|
"loss": 0.5415, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.7863877935882049, |
|
"grad_norm": 0.46500050166415424, |
|
"learning_rate": 4.2091571222418546e-06, |
|
"loss": 0.536, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.7898165609463397, |
|
"grad_norm": 0.4829554221776432, |
|
"learning_rate": 4.189460431993788e-06, |
|
"loss": 0.5451, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.7932453283044745, |
|
"grad_norm": 0.4917384629382616, |
|
"learning_rate": 4.169776651061334e-06, |
|
"loss": 0.5409, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.7966740956626093, |
|
"grad_norm": 0.49620621093693373, |
|
"learning_rate": 4.150106092944475e-06, |
|
"loss": 0.541, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.800102863020744, |
|
"grad_norm": 0.467340353977474, |
|
"learning_rate": 4.130449070932594e-06, |
|
"loss": 0.5407, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.8035316303788789, |
|
"grad_norm": 0.49470765077176454, |
|
"learning_rate": 4.110805898099492e-06, |
|
"loss": 0.5416, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.8069603977370137, |
|
"grad_norm": 0.4666535437410526, |
|
"learning_rate": 4.091176887298392e-06, |
|
"loss": 0.5406, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.8103891650951482, |
|
"grad_norm": 0.46143547716397726, |
|
"learning_rate": 4.071562351156966e-06, |
|
"loss": 0.5344, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.813817932453283, |
|
"grad_norm": 0.46910481572596163, |
|
"learning_rate": 4.051962602072343e-06, |
|
"loss": 0.5364, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.8172466998114178, |
|
"grad_norm": 0.4694928382743332, |
|
"learning_rate": 4.032377952206148e-06, |
|
"loss": 0.5413, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.8206754671695524, |
|
"grad_norm": 0.47422238985591536, |
|
"learning_rate": 4.012808713479523e-06, |
|
"loss": 0.5435, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.8241042345276872, |
|
"grad_norm": 0.48396135634665843, |
|
"learning_rate": 3.993255197568154e-06, |
|
"loss": 0.5336, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.827533001885822, |
|
"grad_norm": 0.4873226150393351, |
|
"learning_rate": 3.9737177158973185e-06, |
|
"loss": 0.5309, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.8309617692439568, |
|
"grad_norm": 0.4576384143762062, |
|
"learning_rate": 3.954196579636918e-06, |
|
"loss": 0.5444, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.8343905366020916, |
|
"grad_norm": 0.49454868144127, |
|
"learning_rate": 3.93469209969652e-06, |
|
"loss": 0.5353, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.8378193039602264, |
|
"grad_norm": 0.4490405182223933, |
|
"learning_rate": 3.91520458672042e-06, |
|
"loss": 0.5352, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.8412480713183612, |
|
"grad_norm": 0.4892412353541936, |
|
"learning_rate": 3.895734351082668e-06, |
|
"loss": 0.5364, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.8446768386764958, |
|
"grad_norm": 0.44948826305297584, |
|
"learning_rate": 3.876281702882156e-06, |
|
"loss": 0.5382, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.8481056060346306, |
|
"grad_norm": 0.46974192317523716, |
|
"learning_rate": 3.8568469519376585e-06, |
|
"loss": 0.534, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.8515343733927652, |
|
"grad_norm": 0.4541143574340376, |
|
"learning_rate": 3.837430407782896e-06, |
|
"loss": 0.5426, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.8549631407509, |
|
"grad_norm": 0.4749792696125447, |
|
"learning_rate": 3.818032379661626e-06, |
|
"loss": 0.5396, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.8583919081090348, |
|
"grad_norm": 0.46058829711339094, |
|
"learning_rate": 3.7986531765226965e-06, |
|
"loss": 0.5385, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.8618206754671696, |
|
"grad_norm": 0.45724757617193085, |
|
"learning_rate": 3.779293107015136e-06, |
|
"loss": 0.5325, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.8652494428253044, |
|
"grad_norm": 0.4613637136202585, |
|
"learning_rate": 3.759952479483232e-06, |
|
"loss": 0.5332, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.8686782101834392, |
|
"grad_norm": 0.45902944748623037, |
|
"learning_rate": 3.7406316019616297e-06, |
|
"loss": 0.5402, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.872106977541574, |
|
"grad_norm": 0.4810427911391462, |
|
"learning_rate": 3.7213307821704115e-06, |
|
"loss": 0.5397, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.8755357448997085, |
|
"grad_norm": 0.5021212230417156, |
|
"learning_rate": 3.7020503275102095e-06, |
|
"loss": 0.5478, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.8789645122578433, |
|
"grad_norm": 0.4836776357761823, |
|
"learning_rate": 3.6827905450573022e-06, |
|
"loss": 0.5358, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.882393279615978, |
|
"grad_norm": 0.5101566373577191, |
|
"learning_rate": 3.6635517415587264e-06, |
|
"loss": 0.5299, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.8858220469741127, |
|
"grad_norm": 0.4734869935919425, |
|
"learning_rate": 3.6443342234273905e-06, |
|
"loss": 0.5364, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.8892508143322475, |
|
"grad_norm": 0.4977350264147008, |
|
"learning_rate": 3.6251382967371938e-06, |
|
"loss": 0.5344, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.8926795816903823, |
|
"grad_norm": 0.45745712112103115, |
|
"learning_rate": 3.6059642672181537e-06, |
|
"loss": 0.5338, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.896108349048517, |
|
"grad_norm": 0.5104324084550051, |
|
"learning_rate": 3.586812440251537e-06, |
|
"loss": 0.5347, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.899537116406652, |
|
"grad_norm": 0.5335392069435976, |
|
"learning_rate": 3.5676831208649887e-06, |
|
"loss": 0.5346, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.9029658837647867, |
|
"grad_norm": 0.4895513127909529, |
|
"learning_rate": 3.5485766137276894e-06, |
|
"loss": 0.5418, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.9063946511229213, |
|
"grad_norm": 0.4542167715320498, |
|
"learning_rate": 3.5294932231454838e-06, |
|
"loss": 0.5385, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.909823418481056, |
|
"grad_norm": 0.4744313937102546, |
|
"learning_rate": 3.510433253056045e-06, |
|
"loss": 0.5391, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.9132521858391907, |
|
"grad_norm": 0.4427663775130247, |
|
"learning_rate": 3.4913970070240388e-06, |
|
"loss": 0.5444, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.9166809531973255, |
|
"grad_norm": 0.47529978421135, |
|
"learning_rate": 3.4723847882362755e-06, |
|
"loss": 0.5373, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.9201097205554603, |
|
"grad_norm": 0.4561099449022098, |
|
"learning_rate": 3.4533968994968913e-06, |
|
"loss": 0.5332, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.923538487913595, |
|
"grad_norm": 0.47185420315317955, |
|
"learning_rate": 3.4344336432225205e-06, |
|
"loss": 0.5277, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.9269672552717299, |
|
"grad_norm": 0.4730035039099479, |
|
"learning_rate": 3.41549532143748e-06, |
|
"loss": 0.5377, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.9303960226298646, |
|
"grad_norm": 0.48079447901439726, |
|
"learning_rate": 3.3965822357689626e-06, |
|
"loss": 0.5349, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.9338247899879994, |
|
"grad_norm": 0.458616349152276, |
|
"learning_rate": 3.3776946874422268e-06, |
|
"loss": 0.5312, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.937253557346134, |
|
"grad_norm": 0.4555844676968641, |
|
"learning_rate": 3.3588329772758054e-06, |
|
"loss": 0.5359, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.9406823247042688, |
|
"grad_norm": 0.4916927479459658, |
|
"learning_rate": 3.3399974056767095e-06, |
|
"loss": 0.5343, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.9441110920624034, |
|
"grad_norm": 0.47636277234450725, |
|
"learning_rate": 3.3211882726356447e-06, |
|
"loss": 0.5363, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.9475398594205382, |
|
"grad_norm": 0.46891572696488587, |
|
"learning_rate": 3.30240587772224e-06, |
|
"loss": 0.537, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.950968626778673, |
|
"grad_norm": 0.4845143780415882, |
|
"learning_rate": 3.283650520080265e-06, |
|
"loss": 0.5367, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.9543973941368078, |
|
"grad_norm": 0.4714569513171776, |
|
"learning_rate": 3.2649224984228756e-06, |
|
"loss": 0.5394, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.9578261614949426, |
|
"grad_norm": 0.46470157896584324, |
|
"learning_rate": 3.246222111027849e-06, |
|
"loss": 0.5404, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.9612549288530774, |
|
"grad_norm": 0.46189223769008775, |
|
"learning_rate": 3.227549655732843e-06, |
|
"loss": 0.5394, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.9646836962112122, |
|
"grad_norm": 0.459933595776418, |
|
"learning_rate": 3.2089054299306376e-06, |
|
"loss": 0.5351, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.9681124635693468, |
|
"grad_norm": 0.4881070886064201, |
|
"learning_rate": 3.19028973056441e-06, |
|
"loss": 0.5326, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.9715412309274816, |
|
"grad_norm": 0.4518011695478976, |
|
"learning_rate": 3.171702854123008e-06, |
|
"loss": 0.5423, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.9749699982856164, |
|
"grad_norm": 0.45756039660819475, |
|
"learning_rate": 3.153145096636211e-06, |
|
"loss": 0.5339, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.978398765643751, |
|
"grad_norm": 0.4544722411338075, |
|
"learning_rate": 3.134616753670036e-06, |
|
"loss": 0.5308, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.9818275330018857, |
|
"grad_norm": 0.4592115025179121, |
|
"learning_rate": 3.1161181203220146e-06, |
|
"loss": 0.5353, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.9852563003600205, |
|
"grad_norm": 0.46992937248193806, |
|
"learning_rate": 3.0976494912165e-06, |
|
"loss": 0.5369, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.9886850677181553, |
|
"grad_norm": 0.4675123294101481, |
|
"learning_rate": 3.079211160499975e-06, |
|
"loss": 0.5326, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.9921138350762901, |
|
"grad_norm": 0.480213421331211, |
|
"learning_rate": 3.060803421836363e-06, |
|
"loss": 0.5392, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.995542602434425, |
|
"grad_norm": 0.4628568848487098, |
|
"learning_rate": 3.0424265684023556e-06, |
|
"loss": 0.5321, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.9989713697925597, |
|
"grad_norm": 0.49388511981349187, |
|
"learning_rate": 3.0240808928827397e-06, |
|
"loss": 0.5287, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.9996571232641864, |
|
"eval_loss": 0.5881877541542053, |
|
"eval_runtime": 204.4729, |
|
"eval_samples_per_second": 48.06, |
|
"eval_steps_per_second": 1.506, |
|
"step": 2916 |
|
}, |
|
{ |
|
"epoch": 2.0024001371506945, |
|
"grad_norm": 0.874753190390515, |
|
"learning_rate": 3.0057666874657365e-06, |
|
"loss": 0.4981, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.005828904508829, |
|
"grad_norm": 0.5805650180046337, |
|
"learning_rate": 2.9874842438383508e-06, |
|
"loss": 0.4781, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.0092576718669637, |
|
"grad_norm": 0.5290132742038788, |
|
"learning_rate": 2.9692338531817205e-06, |
|
"loss": 0.473, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.0126864392250985, |
|
"grad_norm": 0.5751295049236377, |
|
"learning_rate": 2.951015806166485e-06, |
|
"loss": 0.4788, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 2.0161152065832333, |
|
"grad_norm": 0.5442073882893506, |
|
"learning_rate": 2.9328303929481507e-06, |
|
"loss": 0.4756, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.019543973941368, |
|
"grad_norm": 0.5046978835481467, |
|
"learning_rate": 2.9146779031624683e-06, |
|
"loss": 0.4747, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 2.022972741299503, |
|
"grad_norm": 0.4694017759591277, |
|
"learning_rate": 2.8965586259208295e-06, |
|
"loss": 0.4787, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.0264015086576377, |
|
"grad_norm": 0.49182764229033304, |
|
"learning_rate": 2.878472849805651e-06, |
|
"loss": 0.4817, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 2.0298302760157725, |
|
"grad_norm": 0.5353082540824908, |
|
"learning_rate": 2.860420862865787e-06, |
|
"loss": 0.4814, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.0332590433739073, |
|
"grad_norm": 0.4849568396076438, |
|
"learning_rate": 2.842402952611931e-06, |
|
"loss": 0.4774, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 2.0366878107320416, |
|
"grad_norm": 0.455032309231727, |
|
"learning_rate": 2.82441940601205e-06, |
|
"loss": 0.488, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.0401165780901764, |
|
"grad_norm": 0.47238583732747885, |
|
"learning_rate": 2.8064705094868074e-06, |
|
"loss": 0.4847, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.0435453454483112, |
|
"grad_norm": 0.46589829010871375, |
|
"learning_rate": 2.7885565489049948e-06, |
|
"loss": 0.4745, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.046974112806446, |
|
"grad_norm": 0.5097253834071658, |
|
"learning_rate": 2.7706778095789905e-06, |
|
"loss": 0.4822, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 2.050402880164581, |
|
"grad_norm": 0.4744363699843961, |
|
"learning_rate": 2.7528345762602125e-06, |
|
"loss": 0.4837, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.0538316475227156, |
|
"grad_norm": 0.48096853101811016, |
|
"learning_rate": 2.7350271331345733e-06, |
|
"loss": 0.4689, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 2.0572604148808504, |
|
"grad_norm": 0.45888264058390604, |
|
"learning_rate": 2.7172557638179674e-06, |
|
"loss": 0.4814, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0606891822389852, |
|
"grad_norm": 0.5074519719478089, |
|
"learning_rate": 2.6995207513517484e-06, |
|
"loss": 0.4835, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 2.06411794959712, |
|
"grad_norm": 0.46736698036035224, |
|
"learning_rate": 2.681822378198221e-06, |
|
"loss": 0.4821, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.0675467169552544, |
|
"grad_norm": 0.4677318714005963, |
|
"learning_rate": 2.6641609262361343e-06, |
|
"loss": 0.4825, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 2.070975484313389, |
|
"grad_norm": 0.47774880777961237, |
|
"learning_rate": 2.6465366767562162e-06, |
|
"loss": 0.4748, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.074404251671524, |
|
"grad_norm": 0.45924654536691306, |
|
"learning_rate": 2.628949910456663e-06, |
|
"loss": 0.4767, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.077833019029659, |
|
"grad_norm": 0.47391360563192103, |
|
"learning_rate": 2.611400907438685e-06, |
|
"loss": 0.4775, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.0812617863877936, |
|
"grad_norm": 0.49156177694024655, |
|
"learning_rate": 2.5938899472020545e-06, |
|
"loss": 0.4789, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 2.0846905537459284, |
|
"grad_norm": 0.4723037751330721, |
|
"learning_rate": 2.5764173086406306e-06, |
|
"loss": 0.4829, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.088119321104063, |
|
"grad_norm": 0.46114598626056696, |
|
"learning_rate": 2.558983270037937e-06, |
|
"loss": 0.4743, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 2.091548088462198, |
|
"grad_norm": 0.4718425543262081, |
|
"learning_rate": 2.5415881090627227e-06, |
|
"loss": 0.4785, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.0949768558203328, |
|
"grad_norm": 0.48700567548085505, |
|
"learning_rate": 2.5242321027645354e-06, |
|
"loss": 0.4731, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 2.098405623178467, |
|
"grad_norm": 0.47428262784263286, |
|
"learning_rate": 2.506915527569318e-06, |
|
"loss": 0.4805, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.101834390536602, |
|
"grad_norm": 0.482699000253055, |
|
"learning_rate": 2.4896386592749994e-06, |
|
"loss": 0.4809, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.48854325554987943, |
|
"learning_rate": 2.472401773047107e-06, |
|
"loss": 0.4753, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.1086919252528715, |
|
"grad_norm": 0.5091234833700028, |
|
"learning_rate": 2.455205143414375e-06, |
|
"loss": 0.477, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.1121206926110063, |
|
"grad_norm": 0.474218431216323, |
|
"learning_rate": 2.438049044264382e-06, |
|
"loss": 0.4779, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.115549459969141, |
|
"grad_norm": 0.4762105524360361, |
|
"learning_rate": 2.4209337488391886e-06, |
|
"loss": 0.4763, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 2.118978227327276, |
|
"grad_norm": 0.4708922326438511, |
|
"learning_rate": 2.4038595297309712e-06, |
|
"loss": 0.4803, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.1224069946854107, |
|
"grad_norm": 0.48521692596512245, |
|
"learning_rate": 2.386826658877706e-06, |
|
"loss": 0.482, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 2.1258357620435455, |
|
"grad_norm": 0.494787221830382, |
|
"learning_rate": 2.3698354075588105e-06, |
|
"loss": 0.4803, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.12926452940168, |
|
"grad_norm": 0.48222263673783083, |
|
"learning_rate": 2.3528860463908375e-06, |
|
"loss": 0.472, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 2.1326932967598147, |
|
"grad_norm": 0.4824015117741457, |
|
"learning_rate": 2.3359788453231723e-06, |
|
"loss": 0.4797, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.1361220641179495, |
|
"grad_norm": 0.49605929290504214, |
|
"learning_rate": 2.319114073633712e-06, |
|
"loss": 0.4788, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 2.1395508314760843, |
|
"grad_norm": 0.474543321413953, |
|
"learning_rate": 2.3022919999245964e-06, |
|
"loss": 0.4773, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.142979598834219, |
|
"grad_norm": 0.501540938539211, |
|
"learning_rate": 2.285512892117921e-06, |
|
"loss": 0.4822, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.146408366192354, |
|
"grad_norm": 0.5038191337466582, |
|
"learning_rate": 2.2687770174514674e-06, |
|
"loss": 0.4791, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.1498371335504887, |
|
"grad_norm": 0.4648773455909595, |
|
"learning_rate": 2.2520846424744545e-06, |
|
"loss": 0.4828, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 2.1532659009086235, |
|
"grad_norm": 0.4620270257517909, |
|
"learning_rate": 2.23543603304329e-06, |
|
"loss": 0.479, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.1566946682667583, |
|
"grad_norm": 0.4950092604387871, |
|
"learning_rate": 2.218831454317336e-06, |
|
"loss": 0.4746, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 2.160123435624893, |
|
"grad_norm": 0.46350733624535256, |
|
"learning_rate": 2.20227117075468e-06, |
|
"loss": 0.4817, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.1635522029830274, |
|
"grad_norm": 0.4842429161440888, |
|
"learning_rate": 2.1857554461079356e-06, |
|
"loss": 0.4829, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 2.166980970341162, |
|
"grad_norm": 0.48989158037943475, |
|
"learning_rate": 2.1692845434200323e-06, |
|
"loss": 0.477, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.170409737699297, |
|
"grad_norm": 0.444636329908828, |
|
"learning_rate": 2.152858725020025e-06, |
|
"loss": 0.4762, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 2.173838505057432, |
|
"grad_norm": 0.46845184246079136, |
|
"learning_rate": 2.136478252518924e-06, |
|
"loss": 0.4867, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.1772672724155666, |
|
"grad_norm": 0.5158451108463605, |
|
"learning_rate": 2.1201433868055223e-06, |
|
"loss": 0.4773, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.1806960397737014, |
|
"grad_norm": 0.484433856048475, |
|
"learning_rate": 2.103854388042243e-06, |
|
"loss": 0.4808, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.184124807131836, |
|
"grad_norm": 0.4689424096794033, |
|
"learning_rate": 2.08761151566099e-06, |
|
"loss": 0.4851, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 2.187553574489971, |
|
"grad_norm": 0.46853301342354137, |
|
"learning_rate": 2.071415028359026e-06, |
|
"loss": 0.4854, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.190982341848106, |
|
"grad_norm": 0.46534481693626095, |
|
"learning_rate": 2.0552651840948473e-06, |
|
"loss": 0.4789, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 2.19441110920624, |
|
"grad_norm": 0.47458944353516924, |
|
"learning_rate": 2.0391622400840665e-06, |
|
"loss": 0.4801, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.197839876564375, |
|
"grad_norm": 0.46593527956244135, |
|
"learning_rate": 2.023106452795339e-06, |
|
"loss": 0.4726, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 2.2012686439225098, |
|
"grad_norm": 0.4773652150154532, |
|
"learning_rate": 2.0070980779462513e-06, |
|
"loss": 0.4806, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.2046974112806446, |
|
"grad_norm": 0.4647727192391881, |
|
"learning_rate": 1.9911373704992617e-06, |
|
"loss": 0.4806, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 2.2081261786387794, |
|
"grad_norm": 0.4794479506202543, |
|
"learning_rate": 1.975224584657648e-06, |
|
"loss": 0.4791, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.211554945996914, |
|
"grad_norm": 0.49928694289640674, |
|
"learning_rate": 1.9593599738614382e-06, |
|
"loss": 0.4812, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.214983713355049, |
|
"grad_norm": 0.489572736256811, |
|
"learning_rate": 1.943543790783392e-06, |
|
"loss": 0.4822, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.2184124807131838, |
|
"grad_norm": 0.460165152565946, |
|
"learning_rate": 1.927776287324969e-06, |
|
"loss": 0.4845, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 2.2218412480713186, |
|
"grad_norm": 0.4741945403355791, |
|
"learning_rate": 1.9120577146123125e-06, |
|
"loss": 0.4753, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.225270015429453, |
|
"grad_norm": 0.4816051218900053, |
|
"learning_rate": 1.8963883229922603e-06, |
|
"loss": 0.4759, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 2.2286987827875877, |
|
"grad_norm": 0.4865842140542505, |
|
"learning_rate": 1.8807683620283496e-06, |
|
"loss": 0.4887, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.2321275501457225, |
|
"grad_norm": 0.49327525598298544, |
|
"learning_rate": 1.8651980804968466e-06, |
|
"loss": 0.4828, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 2.2355563175038573, |
|
"grad_norm": 0.4710718086720219, |
|
"learning_rate": 1.8496777263827775e-06, |
|
"loss": 0.4805, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.238985084861992, |
|
"grad_norm": 0.4790195567473799, |
|
"learning_rate": 1.834207546875988e-06, |
|
"loss": 0.4761, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 2.242413852220127, |
|
"grad_norm": 0.47138983012088903, |
|
"learning_rate": 1.8187877883672024e-06, |
|
"loss": 0.4809, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.2458426195782617, |
|
"grad_norm": 0.48900189445082154, |
|
"learning_rate": 1.8034186964440937e-06, |
|
"loss": 0.482, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.2492713869363965, |
|
"grad_norm": 0.4723460008561715, |
|
"learning_rate": 1.7881005158873826e-06, |
|
"loss": 0.478, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.2527001542945313, |
|
"grad_norm": 0.45431679994556473, |
|
"learning_rate": 1.7728334906669342e-06, |
|
"loss": 0.4828, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 2.2561289216526657, |
|
"grad_norm": 0.45761440455845676, |
|
"learning_rate": 1.757617863937865e-06, |
|
"loss": 0.4707, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.2595576890108005, |
|
"grad_norm": 0.48498068790945004, |
|
"learning_rate": 1.7424538780366884e-06, |
|
"loss": 0.4724, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 2.2629864563689353, |
|
"grad_norm": 0.45783948313254996, |
|
"learning_rate": 1.7273417744774323e-06, |
|
"loss": 0.4774, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.26641522372707, |
|
"grad_norm": 0.46625983570633794, |
|
"learning_rate": 1.7122817939478104e-06, |
|
"loss": 0.4781, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 2.269843991085205, |
|
"grad_norm": 0.46981244219286544, |
|
"learning_rate": 1.6972741763053835e-06, |
|
"loss": 0.4771, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.2732727584433396, |
|
"grad_norm": 0.47749785750136037, |
|
"learning_rate": 1.6823191605737316e-06, |
|
"loss": 0.4812, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 2.2767015258014744, |
|
"grad_norm": 0.4781030507544295, |
|
"learning_rate": 1.6674169849386606e-06, |
|
"loss": 0.4745, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.2801302931596092, |
|
"grad_norm": 0.47642004107759217, |
|
"learning_rate": 1.6525678867443989e-06, |
|
"loss": 0.4782, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.283559060517744, |
|
"grad_norm": 0.4595236798861582, |
|
"learning_rate": 1.6377721024898214e-06, |
|
"loss": 0.4846, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.286987827875879, |
|
"grad_norm": 0.4607279352731881, |
|
"learning_rate": 1.623029867824678e-06, |
|
"loss": 0.4783, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 2.290416595234013, |
|
"grad_norm": 0.4857822760894794, |
|
"learning_rate": 1.608341417545849e-06, |
|
"loss": 0.4774, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.293845362592148, |
|
"grad_norm": 0.4577566161740047, |
|
"learning_rate": 1.593706985593599e-06, |
|
"loss": 0.4727, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 2.297274129950283, |
|
"grad_norm": 0.4573512037078888, |
|
"learning_rate": 1.5791268050478487e-06, |
|
"loss": 0.4746, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.3007028973084176, |
|
"grad_norm": 0.47439501092754294, |
|
"learning_rate": 1.5646011081244717e-06, |
|
"loss": 0.4865, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 2.3041316646665524, |
|
"grad_norm": 0.49446637460132087, |
|
"learning_rate": 1.5501301261715896e-06, |
|
"loss": 0.4751, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.307560432024687, |
|
"grad_norm": 0.4952360092448791, |
|
"learning_rate": 1.535714089665889e-06, |
|
"loss": 0.4815, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 2.310989199382822, |
|
"grad_norm": 0.4731157255344, |
|
"learning_rate": 1.5213532282089466e-06, |
|
"loss": 0.4792, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.314417966740957, |
|
"grad_norm": 0.4538368226200657, |
|
"learning_rate": 1.5070477705235803e-06, |
|
"loss": 0.4818, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.317846734099091, |
|
"grad_norm": 0.45299648856103863, |
|
"learning_rate": 1.4927979444502028e-06, |
|
"loss": 0.4761, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.321275501457226, |
|
"grad_norm": 0.48133972008005105, |
|
"learning_rate": 1.478603976943186e-06, |
|
"loss": 0.4796, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 2.3247042688153607, |
|
"grad_norm": 0.46559397796529, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.4833, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.3281330361734955, |
|
"grad_norm": 0.46261472772428197, |
|
"learning_rate": 1.4503845209939083e-06, |
|
"loss": 0.4866, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 2.3315618035316303, |
|
"grad_norm": 0.48901577194657203, |
|
"learning_rate": 1.4363594819977606e-06, |
|
"loss": 0.4802, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.334990570889765, |
|
"grad_norm": 0.47026562258797505, |
|
"learning_rate": 1.4223912004530593e-06, |
|
"loss": 0.4842, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 2.3384193382479, |
|
"grad_norm": 0.46476370601090344, |
|
"learning_rate": 1.4084798988300684e-06, |
|
"loss": 0.4746, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.3418481056060347, |
|
"grad_norm": 0.4736014629302977, |
|
"learning_rate": 1.3946257986915486e-06, |
|
"loss": 0.4811, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 2.3452768729641695, |
|
"grad_norm": 0.4734489622060502, |
|
"learning_rate": 1.3808291206892232e-06, |
|
"loss": 0.4773, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.3487056403223043, |
|
"grad_norm": 0.4629066750832987, |
|
"learning_rate": 1.3670900845602585e-06, |
|
"loss": 0.4829, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.3521344076804387, |
|
"grad_norm": 0.4681227467302218, |
|
"learning_rate": 1.3534089091237757e-06, |
|
"loss": 0.476, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.3555631750385735, |
|
"grad_norm": 0.454191418129244, |
|
"learning_rate": 1.3397858122773544e-06, |
|
"loss": 0.4747, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 2.3589919423967083, |
|
"grad_norm": 0.4579452618693956, |
|
"learning_rate": 1.3262210109935719e-06, |
|
"loss": 0.4832, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.362420709754843, |
|
"grad_norm": 0.45074466888367914, |
|
"learning_rate": 1.312714721316537e-06, |
|
"loss": 0.4755, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 2.365849477112978, |
|
"grad_norm": 0.4723349908020595, |
|
"learning_rate": 1.2992671583584587e-06, |
|
"loss": 0.4862, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.3692782444711127, |
|
"grad_norm": 0.4708208262941508, |
|
"learning_rate": 1.2858785362962166e-06, |
|
"loss": 0.4777, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 2.3727070118292475, |
|
"grad_norm": 0.47375162486732203, |
|
"learning_rate": 1.2725490683679458e-06, |
|
"loss": 0.4729, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.3761357791873823, |
|
"grad_norm": 0.4678384719988464, |
|
"learning_rate": 1.2592789668696482e-06, |
|
"loss": 0.4787, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 2.379564546545517, |
|
"grad_norm": 0.4702212675672681, |
|
"learning_rate": 1.2460684431518055e-06, |
|
"loss": 0.4813, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.3829933139036514, |
|
"grad_norm": 0.4532398056344406, |
|
"learning_rate": 1.232917707616017e-06, |
|
"loss": 0.4735, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.3864220812617862, |
|
"grad_norm": 0.45797439884914765, |
|
"learning_rate": 1.2198269697116416e-06, |
|
"loss": 0.4778, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.389850848619921, |
|
"grad_norm": 0.4723176003130105, |
|
"learning_rate": 1.206796437932472e-06, |
|
"loss": 0.4774, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 2.393279615978056, |
|
"grad_norm": 0.46906434827216664, |
|
"learning_rate": 1.1938263198134087e-06, |
|
"loss": 0.4808, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.3967083833361906, |
|
"grad_norm": 0.4678613569139536, |
|
"learning_rate": 1.1809168219271488e-06, |
|
"loss": 0.4795, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 2.4001371506943254, |
|
"grad_norm": 0.47248188702171634, |
|
"learning_rate": 1.168068149880912e-06, |
|
"loss": 0.4781, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.4035659180524602, |
|
"grad_norm": 0.46791595503949973, |
|
"learning_rate": 1.1552805083131468e-06, |
|
"loss": 0.4786, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 2.406994685410595, |
|
"grad_norm": 0.45897414399547337, |
|
"learning_rate": 1.1425541008902852e-06, |
|
"loss": 0.4775, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.41042345276873, |
|
"grad_norm": 0.4610154781111884, |
|
"learning_rate": 1.1298891303034942e-06, |
|
"loss": 0.4823, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 2.4138522201268646, |
|
"grad_norm": 0.46406483424598494, |
|
"learning_rate": 1.1172857982654445e-06, |
|
"loss": 0.4761, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.417280987484999, |
|
"grad_norm": 0.4475457001447567, |
|
"learning_rate": 1.1047443055071033e-06, |
|
"loss": 0.4784, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.4207097548431338, |
|
"grad_norm": 0.46031864575357934, |
|
"learning_rate": 1.092264851774536e-06, |
|
"loss": 0.4741, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.4241385222012686, |
|
"grad_norm": 0.4642032024999473, |
|
"learning_rate": 1.0798476358257198e-06, |
|
"loss": 0.4763, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 2.4275672895594034, |
|
"grad_norm": 0.46462241022456946, |
|
"learning_rate": 1.067492855427385e-06, |
|
"loss": 0.4787, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.430996056917538, |
|
"grad_norm": 0.4769725916465071, |
|
"learning_rate": 1.0552007073518628e-06, |
|
"loss": 0.4826, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 2.434424824275673, |
|
"grad_norm": 0.46174302133938927, |
|
"learning_rate": 1.0429713873739505e-06, |
|
"loss": 0.4847, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.4378535916338078, |
|
"grad_norm": 0.45270102445912286, |
|
"learning_rate": 1.0308050902677907e-06, |
|
"loss": 0.4793, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 2.4412823589919426, |
|
"grad_norm": 0.46466350347691254, |
|
"learning_rate": 1.0187020098037759e-06, |
|
"loss": 0.4807, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.444711126350077, |
|
"grad_norm": 0.4615768268062226, |
|
"learning_rate": 1.0066623387454582e-06, |
|
"loss": 0.4811, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 2.4481398937082117, |
|
"grad_norm": 0.4544451248675266, |
|
"learning_rate": 9.946862688464753e-07, |
|
"loss": 0.4767, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.4515686610663465, |
|
"grad_norm": 0.4742062204892906, |
|
"learning_rate": 9.827739908475099e-07, |
|
"loss": 0.4776, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.4549974284244813, |
|
"grad_norm": 0.4672145082984867, |
|
"learning_rate": 9.709256944732343e-07, |
|
"loss": 0.4804, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.458426195782616, |
|
"grad_norm": 0.44899579410081497, |
|
"learning_rate": 9.591415684292975e-07, |
|
"loss": 0.4778, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 2.461854963140751, |
|
"grad_norm": 0.46844243005424735, |
|
"learning_rate": 9.474218003993275e-07, |
|
"loss": 0.4753, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.4652837304988857, |
|
"grad_norm": 0.4654191148385016, |
|
"learning_rate": 9.357665770419244e-07, |
|
"loss": 0.482, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 2.4687124978570205, |
|
"grad_norm": 0.45942731935750447, |
|
"learning_rate": 9.241760839877023e-07, |
|
"loss": 0.478, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.4721412652151553, |
|
"grad_norm": 0.4666227072724768, |
|
"learning_rate": 9.126505058363283e-07, |
|
"loss": 0.4753, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 2.47557003257329, |
|
"grad_norm": 0.4519254146818986, |
|
"learning_rate": 9.011900261535767e-07, |
|
"loss": 0.4783, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.4789987999314245, |
|
"grad_norm": 0.463358478501908, |
|
"learning_rate": 8.897948274684154e-07, |
|
"loss": 0.4739, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 2.4824275672895593, |
|
"grad_norm": 0.45511211330917706, |
|
"learning_rate": 8.784650912700909e-07, |
|
"loss": 0.4729, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.485856334647694, |
|
"grad_norm": 0.47432321264965754, |
|
"learning_rate": 8.672009980052442e-07, |
|
"loss": 0.4811, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.489285102005829, |
|
"grad_norm": 0.4673877749252423, |
|
"learning_rate": 8.560027270750276e-07, |
|
"loss": 0.4814, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.4927138693639637, |
|
"grad_norm": 0.4579464435065071, |
|
"learning_rate": 8.44870456832258e-07, |
|
"loss": 0.4806, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 2.4961426367220985, |
|
"grad_norm": 0.476376159360163, |
|
"learning_rate": 8.338043645785698e-07, |
|
"loss": 0.4823, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.4995714040802333, |
|
"grad_norm": 0.46673661354408297, |
|
"learning_rate": 8.228046265615908e-07, |
|
"loss": 0.4759, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 2.503000171438368, |
|
"grad_norm": 0.4689234252839364, |
|
"learning_rate": 8.118714179721404e-07, |
|
"loss": 0.4781, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.5064289387965024, |
|
"grad_norm": 0.457577939504501, |
|
"learning_rate": 8.010049129414333e-07, |
|
"loss": 0.4754, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 2.509857706154637, |
|
"grad_norm": 0.46767489107398075, |
|
"learning_rate": 7.902052845383112e-07, |
|
"loss": 0.4832, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.513286473512772, |
|
"grad_norm": 0.47806526391479376, |
|
"learning_rate": 7.794727047664807e-07, |
|
"loss": 0.4777, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 2.516715240870907, |
|
"grad_norm": 0.44998017534315693, |
|
"learning_rate": 7.6880734456178e-07, |
|
"loss": 0.4728, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.5201440082290416, |
|
"grad_norm": 0.4477060965035502, |
|
"learning_rate": 7.58209373789453e-07, |
|
"loss": 0.4734, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.5235727755871764, |
|
"grad_norm": 0.46217386088270807, |
|
"learning_rate": 7.476789612414414e-07, |
|
"loss": 0.4846, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.527001542945311, |
|
"grad_norm": 0.4563904666440247, |
|
"learning_rate": 7.372162746337059e-07, |
|
"loss": 0.4816, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 2.530430310303446, |
|
"grad_norm": 0.44520759689704725, |
|
"learning_rate": 7.268214806035423e-07, |
|
"loss": 0.4768, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.533859077661581, |
|
"grad_norm": 0.4542091051330333, |
|
"learning_rate": 7.164947447069343e-07, |
|
"loss": 0.4798, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 2.5372878450197156, |
|
"grad_norm": 0.45146271333603777, |
|
"learning_rate": 7.062362314159211e-07, |
|
"loss": 0.474, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.5407166123778504, |
|
"grad_norm": 0.4843676817399376, |
|
"learning_rate": 6.960461041159677e-07, |
|
"loss": 0.4826, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 2.5441453797359848, |
|
"grad_norm": 0.450384975906084, |
|
"learning_rate": 6.859245251033697e-07, |
|
"loss": 0.4768, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.5475741470941196, |
|
"grad_norm": 0.4460709873735881, |
|
"learning_rate": 6.758716555826683e-07, |
|
"loss": 0.4733, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 2.5510029144522544, |
|
"grad_norm": 0.4521937918515753, |
|
"learning_rate": 6.658876556640781e-07, |
|
"loss": 0.4769, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.554431681810389, |
|
"grad_norm": 0.4490013681026186, |
|
"learning_rate": 6.559726843609421e-07, |
|
"loss": 0.4805, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.557860449168524, |
|
"grad_norm": 0.45604478361558604, |
|
"learning_rate": 6.461268995871967e-07, |
|
"loss": 0.472, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.5612892165266588, |
|
"grad_norm": 0.4684730083561354, |
|
"learning_rate": 6.36350458154859e-07, |
|
"loss": 0.4823, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 2.5647179838847936, |
|
"grad_norm": 0.44901484120067714, |
|
"learning_rate": 6.266435157715222e-07, |
|
"loss": 0.4799, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.568146751242928, |
|
"grad_norm": 0.45631182390582853, |
|
"learning_rate": 6.170062270378862e-07, |
|
"loss": 0.4762, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 2.5715755186010627, |
|
"grad_norm": 0.4602387391697606, |
|
"learning_rate": 6.074387454452891e-07, |
|
"loss": 0.4713, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.5750042859591975, |
|
"grad_norm": 0.4491465188344547, |
|
"learning_rate": 5.979412233732578e-07, |
|
"loss": 0.4751, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 2.5784330533173323, |
|
"grad_norm": 0.46805364957632967, |
|
"learning_rate": 5.885138120870965e-07, |
|
"loss": 0.4789, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.581861820675467, |
|
"grad_norm": 0.4531331451215189, |
|
"learning_rate": 5.791566617354599e-07, |
|
"loss": 0.4754, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 2.585290588033602, |
|
"grad_norm": 0.4652091913696696, |
|
"learning_rate": 5.698699213479697e-07, |
|
"loss": 0.4763, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.5887193553917367, |
|
"grad_norm": 0.4542601341667562, |
|
"learning_rate": 5.606537388328481e-07, |
|
"loss": 0.4789, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.5921481227498715, |
|
"grad_norm": 0.4447156024624712, |
|
"learning_rate": 5.515082609745465e-07, |
|
"loss": 0.476, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.5955768901080063, |
|
"grad_norm": 0.45794390952267283, |
|
"learning_rate": 5.424336334314212e-07, |
|
"loss": 0.4781, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 2.599005657466141, |
|
"grad_norm": 0.45160699771932494, |
|
"learning_rate": 5.334300007334065e-07, |
|
"loss": 0.4771, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.602434424824276, |
|
"grad_norm": 0.4618426839968327, |
|
"learning_rate": 5.244975062797176e-07, |
|
"loss": 0.4728, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 2.6058631921824107, |
|
"grad_norm": 0.4486150363583273, |
|
"learning_rate": 5.156362923365587e-07, |
|
"loss": 0.4748, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.609291959540545, |
|
"grad_norm": 0.4545115387932617, |
|
"learning_rate": 5.068465000348666e-07, |
|
"loss": 0.4739, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 2.61272072689868, |
|
"grad_norm": 0.4623008940487107, |
|
"learning_rate": 4.981282693680584e-07, |
|
"loss": 0.4783, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.6161494942568146, |
|
"grad_norm": 0.4653422446290839, |
|
"learning_rate": 4.894817391897988e-07, |
|
"loss": 0.4854, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 2.6195782616149494, |
|
"grad_norm": 0.4515048388429379, |
|
"learning_rate": 4.80907047211796e-07, |
|
"loss": 0.4831, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.6230070289730842, |
|
"grad_norm": 0.4639284889413238, |
|
"learning_rate": 4.7240433000160233e-07, |
|
"loss": 0.4836, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.626435796331219, |
|
"grad_norm": 0.4465998513923373, |
|
"learning_rate": 4.639737229804403e-07, |
|
"loss": 0.4789, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.629864563689354, |
|
"grad_norm": 0.46955783511456173, |
|
"learning_rate": 4.556153604210484e-07, |
|
"loss": 0.4823, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 2.633293331047488, |
|
"grad_norm": 0.4509791198423544, |
|
"learning_rate": 4.473293754455399e-07, |
|
"loss": 0.477, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.636722098405623, |
|
"grad_norm": 0.46642581073567424, |
|
"learning_rate": 4.3911590002328476e-07, |
|
"loss": 0.4787, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 2.640150865763758, |
|
"grad_norm": 0.45567923757743767, |
|
"learning_rate": 4.3097506496880325e-07, |
|
"loss": 0.478, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.6435796331218926, |
|
"grad_norm": 0.46511398506896673, |
|
"learning_rate": 4.2290699993968897e-07, |
|
"loss": 0.4796, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 2.6470084004800274, |
|
"grad_norm": 0.4739348227964508, |
|
"learning_rate": 4.149118334345403e-07, |
|
"loss": 0.474, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.650437167838162, |
|
"grad_norm": 0.46126850845642486, |
|
"learning_rate": 4.0698969279091115e-07, |
|
"loss": 0.4786, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 2.653865935196297, |
|
"grad_norm": 0.4536599491788961, |
|
"learning_rate": 3.9914070418329123e-07, |
|
"loss": 0.477, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.657294702554432, |
|
"grad_norm": 0.4620488468718019, |
|
"learning_rate": 3.9136499262108716e-07, |
|
"loss": 0.4808, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.6607234699125666, |
|
"grad_norm": 0.4655535577302228, |
|
"learning_rate": 3.836626819466338e-07, |
|
"loss": 0.4809, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.6641522372707014, |
|
"grad_norm": 0.5048993299270467, |
|
"learning_rate": 3.760338948332293e-07, |
|
"loss": 0.4731, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 2.667581004628836, |
|
"grad_norm": 0.47154869975353264, |
|
"learning_rate": 3.684787527831707e-07, |
|
"loss": 0.477, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.6710097719869705, |
|
"grad_norm": 0.4642713924482236, |
|
"learning_rate": 3.6099737612582455e-07, |
|
"loss": 0.475, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 2.6744385393451053, |
|
"grad_norm": 0.4483908444332171, |
|
"learning_rate": 3.53589884015712e-07, |
|
"loss": 0.4753, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.67786730670324, |
|
"grad_norm": 0.4652782765678345, |
|
"learning_rate": 3.462563944306047e-07, |
|
"loss": 0.4816, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 2.681296074061375, |
|
"grad_norm": 0.45331932775420813, |
|
"learning_rate": 3.3899702416965166e-07, |
|
"loss": 0.4777, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.6847248414195097, |
|
"grad_norm": 0.4412624964963365, |
|
"learning_rate": 3.3181188885151706e-07, |
|
"loss": 0.4763, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 2.6881536087776445, |
|
"grad_norm": 0.45014302824757474, |
|
"learning_rate": 3.247011029125391e-07, |
|
"loss": 0.4786, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.6915823761357793, |
|
"grad_norm": 0.4631051719702464, |
|
"learning_rate": 3.1766477960490485e-07, |
|
"loss": 0.4846, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.6950111434939137, |
|
"grad_norm": 0.4581555189827555, |
|
"learning_rate": 3.1070303099485055e-07, |
|
"loss": 0.484, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.6984399108520485, |
|
"grad_norm": 0.45350853465967644, |
|
"learning_rate": 3.038159679608754e-07, |
|
"loss": 0.4789, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 2.7018686782101833, |
|
"grad_norm": 0.4546945454352565, |
|
"learning_rate": 2.9700370019197287e-07, |
|
"loss": 0.4763, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.705297445568318, |
|
"grad_norm": 0.449234094604194, |
|
"learning_rate": 2.9026633618588706e-07, |
|
"loss": 0.4759, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 2.708726212926453, |
|
"grad_norm": 0.45160809662086393, |
|
"learning_rate": 2.8360398324738415e-07, |
|
"loss": 0.4716, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.7121549802845877, |
|
"grad_norm": 0.4482757694655283, |
|
"learning_rate": 2.7701674748654206e-07, |
|
"loss": 0.4774, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 2.7155837476427225, |
|
"grad_norm": 0.4605988211239266, |
|
"learning_rate": 2.7050473381706186e-07, |
|
"loss": 0.4796, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.7190125150008573, |
|
"grad_norm": 0.4542079439617735, |
|
"learning_rate": 2.640680459545941e-07, |
|
"loss": 0.4814, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 2.722441282358992, |
|
"grad_norm": 0.47170501988492425, |
|
"learning_rate": 2.577067864150906e-07, |
|
"loss": 0.477, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.725870049717127, |
|
"grad_norm": 0.45766219103133904, |
|
"learning_rate": 2.514210565131708e-07, |
|
"loss": 0.479, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.7292988170752617, |
|
"grad_norm": 0.46091999743971984, |
|
"learning_rate": 2.452109563605065e-07, |
|
"loss": 0.4795, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.732727584433396, |
|
"grad_norm": 0.46053964874575465, |
|
"learning_rate": 2.3907658486422734e-07, |
|
"loss": 0.4816, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 2.736156351791531, |
|
"grad_norm": 0.44585333829135504, |
|
"learning_rate": 2.330180397253473e-07, |
|
"loss": 0.4712, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.7395851191496656, |
|
"grad_norm": 0.45616604492168555, |
|
"learning_rate": 2.2703541743720913e-07, |
|
"loss": 0.4765, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 2.7430138865078004, |
|
"grad_norm": 0.458862963479115, |
|
"learning_rate": 2.2112881328394287e-07, |
|
"loss": 0.4734, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.7464426538659352, |
|
"grad_norm": 0.4564004005454542, |
|
"learning_rate": 2.152983213389559e-07, |
|
"loss": 0.4817, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 2.74987142122407, |
|
"grad_norm": 0.4527978346632276, |
|
"learning_rate": 2.0954403446342753e-07, |
|
"loss": 0.4778, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.753300188582205, |
|
"grad_norm": 0.44674470885527423, |
|
"learning_rate": 2.0386604430483237e-07, |
|
"loss": 0.4753, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 2.756728955940339, |
|
"grad_norm": 0.4573693361590737, |
|
"learning_rate": 1.9826444129548317e-07, |
|
"loss": 0.4829, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.760157723298474, |
|
"grad_norm": 0.46202280759285275, |
|
"learning_rate": 1.927393146510881e-07, |
|
"loss": 0.4794, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.7635864906566088, |
|
"grad_norm": 0.44872435639531505, |
|
"learning_rate": 1.8729075236932903e-07, |
|
"loss": 0.4712, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.7670152580147436, |
|
"grad_norm": 0.4562789796076479, |
|
"learning_rate": 1.8191884122846226e-07, |
|
"loss": 0.4775, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 2.7704440253728784, |
|
"grad_norm": 0.4585752798754004, |
|
"learning_rate": 1.7662366678593502e-07, |
|
"loss": 0.4803, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.773872792731013, |
|
"grad_norm": 0.44460297881292643, |
|
"learning_rate": 1.7140531337702383e-07, |
|
"loss": 0.4747, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 2.777301560089148, |
|
"grad_norm": 0.4425263511378165, |
|
"learning_rate": 1.6626386411348783e-07, |
|
"loss": 0.4715, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.7807303274472828, |
|
"grad_norm": 0.448778429615437, |
|
"learning_rate": 1.6119940088225317e-07, |
|
"loss": 0.4821, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 2.7841590948054176, |
|
"grad_norm": 0.4454280991695363, |
|
"learning_rate": 1.56212004344099e-07, |
|
"loss": 0.4725, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.7875878621635524, |
|
"grad_norm": 0.45389256280014806, |
|
"learning_rate": 1.5130175393237744e-07, |
|
"loss": 0.482, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 2.791016629521687, |
|
"grad_norm": 0.44797422922275487, |
|
"learning_rate": 1.4646872785175182e-07, |
|
"loss": 0.4736, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.794445396879822, |
|
"grad_norm": 0.4500167947023535, |
|
"learning_rate": 1.417130030769448e-07, |
|
"loss": 0.4748, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.7978741642379563, |
|
"grad_norm": 0.4384121757610316, |
|
"learning_rate": 1.3703465535151505e-07, |
|
"loss": 0.4787, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.801302931596091, |
|
"grad_norm": 0.4542996969309403, |
|
"learning_rate": 1.3243375918665314e-07, |
|
"loss": 0.4757, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 2.804731698954226, |
|
"grad_norm": 0.44313787892697587, |
|
"learning_rate": 1.2791038785999243e-07, |
|
"loss": 0.4723, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.8081604663123607, |
|
"grad_norm": 0.45221326925330546, |
|
"learning_rate": 1.2346461341443962e-07, |
|
"loss": 0.4739, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 2.8115892336704955, |
|
"grad_norm": 0.4493204806606999, |
|
"learning_rate": 1.1909650665703265e-07, |
|
"loss": 0.476, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.8150180010286303, |
|
"grad_norm": 0.4511251142367096, |
|
"learning_rate": 1.1480613715781074e-07, |
|
"loss": 0.4712, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 2.818446768386765, |
|
"grad_norm": 0.44576223137927595, |
|
"learning_rate": 1.1059357324870456e-07, |
|
"loss": 0.4804, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.8218755357448995, |
|
"grad_norm": 0.45162764316225673, |
|
"learning_rate": 1.064588820224488e-07, |
|
"loss": 0.4756, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 2.8253043031030343, |
|
"grad_norm": 0.45919178354925205, |
|
"learning_rate": 1.024021293315175e-07, |
|
"loss": 0.4789, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.828733070461169, |
|
"grad_norm": 0.45178753105354696, |
|
"learning_rate": 9.842337978706707e-08, |
|
"loss": 0.4712, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.832161837819304, |
|
"grad_norm": 0.44369641899607265, |
|
"learning_rate": 9.452269675791603e-08, |
|
"loss": 0.4693, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.8355906051774387, |
|
"grad_norm": 0.4397395162890691, |
|
"learning_rate": 9.070014236952973e-08, |
|
"loss": 0.4725, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 2.8390193725355735, |
|
"grad_norm": 0.4515355683540246, |
|
"learning_rate": 8.69557775030344e-08, |
|
"loss": 0.4773, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.8424481398937083, |
|
"grad_norm": 0.4403255731977004, |
|
"learning_rate": 8.328966179424413e-08, |
|
"loss": 0.4733, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 2.845876907251843, |
|
"grad_norm": 0.4543200394332973, |
|
"learning_rate": 7.970185363271432e-08, |
|
"loss": 0.481, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.849305674609978, |
|
"grad_norm": 0.4564283136704236, |
|
"learning_rate": 7.619241016081081e-08, |
|
"loss": 0.4761, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 2.8527344419681127, |
|
"grad_norm": 0.44588624487284356, |
|
"learning_rate": 7.276138727279669e-08, |
|
"loss": 0.4797, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.8561632093262475, |
|
"grad_norm": 0.45198814901899365, |
|
"learning_rate": 6.940883961394917e-08, |
|
"loss": 0.4798, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 2.859591976684382, |
|
"grad_norm": 0.4501783006120383, |
|
"learning_rate": 6.613482057968023e-08, |
|
"loss": 0.4732, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.8630207440425166, |
|
"grad_norm": 0.4373982769658886, |
|
"learning_rate": 6.293938231469398e-08, |
|
"loss": 0.4736, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.8664495114006514, |
|
"grad_norm": 0.4468552193790084, |
|
"learning_rate": 5.982257571215178e-08, |
|
"loss": 0.4775, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.869878278758786, |
|
"grad_norm": 0.44830321173439996, |
|
"learning_rate": 5.6784450412862293e-08, |
|
"loss": 0.4721, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 2.873307046116921, |
|
"grad_norm": 0.44597562076766367, |
|
"learning_rate": 5.382505480449274e-08, |
|
"loss": 0.4757, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.876735813475056, |
|
"grad_norm": 0.46198888054755305, |
|
"learning_rate": 5.094443602079613e-08, |
|
"loss": 0.4769, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 2.8801645808331906, |
|
"grad_norm": 0.4481590814268269, |
|
"learning_rate": 4.814263994086077e-08, |
|
"loss": 0.4745, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.883593348191325, |
|
"grad_norm": 0.44268948043488404, |
|
"learning_rate": 4.541971118838196e-08, |
|
"loss": 0.4802, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 2.8870221155494598, |
|
"grad_norm": 0.4450782983770214, |
|
"learning_rate": 4.2775693130948094e-08, |
|
"loss": 0.4786, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.8904508829075946, |
|
"grad_norm": 0.4605099335234408, |
|
"learning_rate": 4.021062787935181e-08, |
|
"loss": 0.4784, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 2.8938796502657294, |
|
"grad_norm": 0.4481715420899884, |
|
"learning_rate": 3.772455628691829e-08, |
|
"loss": 0.4704, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.897308417623864, |
|
"grad_norm": 0.4646866287413105, |
|
"learning_rate": 3.531751794885574e-08, |
|
"loss": 0.4799, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.900737184981999, |
|
"grad_norm": 0.4565890986941479, |
|
"learning_rate": 3.2989551201624836e-08, |
|
"loss": 0.4825, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.9041659523401337, |
|
"grad_norm": 0.44645402505000176, |
|
"learning_rate": 3.0740693122325304e-08, |
|
"loss": 0.4735, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 2.9075947196982685, |
|
"grad_norm": 0.4507869739723372, |
|
"learning_rate": 2.857097952810972e-08, |
|
"loss": 0.4862, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.9110234870564033, |
|
"grad_norm": 0.4501912072102517, |
|
"learning_rate": 2.6480444975610086e-08, |
|
"loss": 0.4789, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 2.914452254414538, |
|
"grad_norm": 0.447987603121036, |
|
"learning_rate": 2.4469122760388264e-08, |
|
"loss": 0.4778, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.917881021772673, |
|
"grad_norm": 0.44971834009936357, |
|
"learning_rate": 2.2537044916405292e-08, |
|
"loss": 0.4772, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 2.9213097891308077, |
|
"grad_norm": 0.4495077806550815, |
|
"learning_rate": 2.0684242215511797e-08, |
|
"loss": 0.4749, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.924738556488942, |
|
"grad_norm": 0.44694368287876113, |
|
"learning_rate": 1.8910744166958926e-08, |
|
"loss": 0.4801, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 2.928167323847077, |
|
"grad_norm": 0.4497877428185916, |
|
"learning_rate": 1.7216579016925415e-08, |
|
"loss": 0.484, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.9315960912052117, |
|
"grad_norm": 0.44920658823268794, |
|
"learning_rate": 1.5601773748070147e-08, |
|
"loss": 0.4814, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.9350248585633465, |
|
"grad_norm": 0.4438689776804719, |
|
"learning_rate": 1.4066354079101396e-08, |
|
"loss": 0.4792, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.9384536259214813, |
|
"grad_norm": 0.446097901399973, |
|
"learning_rate": 1.2610344464367707e-08, |
|
"loss": 0.478, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 2.941882393279616, |
|
"grad_norm": 0.447115729483812, |
|
"learning_rate": 1.1233768093468766e-08, |
|
"loss": 0.4757, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.945311160637751, |
|
"grad_norm": 0.4481899023438092, |
|
"learning_rate": 9.936646890884582e-09, |
|
"loss": 0.4739, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 2.9487399279958852, |
|
"grad_norm": 0.4506621904879036, |
|
"learning_rate": 8.719001515627434e-09, |
|
"loss": 0.479, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.95216869535402, |
|
"grad_norm": 0.4533955216869492, |
|
"learning_rate": 7.58085136091269e-09, |
|
"loss": 0.4817, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 2.955597462712155, |
|
"grad_norm": 0.43437042453552366, |
|
"learning_rate": 6.5222145538501595e-09, |
|
"loss": 0.4821, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.9590262300702896, |
|
"grad_norm": 0.45340023997429035, |
|
"learning_rate": 5.543107955154892e-09, |
|
"loss": 0.4722, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 2.9624549974284244, |
|
"grad_norm": 0.4547732433450872, |
|
"learning_rate": 4.643547158878492e-09, |
|
"loss": 0.4783, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.9658837647865592, |
|
"grad_norm": 0.4459015086328452, |
|
"learning_rate": 3.8235464921621e-09, |
|
"loss": 0.4759, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.969312532144694, |
|
"grad_norm": 0.43573098371648133, |
|
"learning_rate": 3.0831190150054646e-09, |
|
"loss": 0.4793, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.972741299502829, |
|
"grad_norm": 0.4558549928949452, |
|
"learning_rate": 2.422276520062661e-09, |
|
"loss": 0.4754, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 2.9761700668609636, |
|
"grad_norm": 0.45059494788354154, |
|
"learning_rate": 1.8410295324505778e-09, |
|
"loss": 0.4808, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.9795988342190984, |
|
"grad_norm": 0.4527487921892312, |
|
"learning_rate": 1.3393873095846055e-09, |
|
"loss": 0.4771, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 2.9830276015772332, |
|
"grad_norm": 0.44727140352439004, |
|
"learning_rate": 9.173578410281992e-10, |
|
"loss": 0.4793, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.9864563689353676, |
|
"grad_norm": 0.44819706968866224, |
|
"learning_rate": 5.749478483679794e-10, |
|
"loss": 0.4785, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 2.9898851362935024, |
|
"grad_norm": 0.4540122997268082, |
|
"learning_rate": 3.1216278510493027e-10, |
|
"loss": 0.4768, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.993313903651637, |
|
"grad_norm": 0.44168345357587324, |
|
"learning_rate": 1.2900683656891232e-10, |
|
"loss": 0.4688, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 2.996742671009772, |
|
"grad_norm": 0.45853813294171786, |
|
"learning_rate": 2.548291985149387e-11, |
|
"loss": 0.4775, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.9994856848962796, |
|
"eval_loss": 0.5953422784805298, |
|
"eval_runtime": 204.761, |
|
"eval_samples_per_second": 47.993, |
|
"eval_steps_per_second": 1.504, |
|
"step": 4374 |
|
}, |
|
{ |
|
"epoch": 2.9994856848962796, |
|
"step": 4374, |
|
"total_flos": 3663097076121600.0, |
|
"train_loss": 0.5645236694000604, |
|
"train_runtime": 46689.0597, |
|
"train_samples_per_second": 11.993, |
|
"train_steps_per_second": 0.094 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4374, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3663097076121600.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|