|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.44483985765124556, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00044483985765124553, |
|
"grad_norm": 0.5906195640563965, |
|
"learning_rate": 4e-05, |
|
"loss": 1.504, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008896797153024911, |
|
"grad_norm": 0.8538110852241516, |
|
"learning_rate": 8e-05, |
|
"loss": 1.4735, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0013345195729537367, |
|
"grad_norm": 0.4623548686504364, |
|
"learning_rate": 0.00012, |
|
"loss": 1.3887, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0017793594306049821, |
|
"grad_norm": 0.40841907262802124, |
|
"learning_rate": 0.00016, |
|
"loss": 1.2993, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002224199288256228, |
|
"grad_norm": 0.4153430759906769, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3105, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0026690391459074734, |
|
"grad_norm": 0.4014444947242737, |
|
"learning_rate": 0.00019979899497487438, |
|
"loss": 1.1942, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.003113879003558719, |
|
"grad_norm": 0.39270123839378357, |
|
"learning_rate": 0.00019959798994974876, |
|
"loss": 1.1688, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0035587188612099642, |
|
"grad_norm": 0.4414786696434021, |
|
"learning_rate": 0.00019939698492462313, |
|
"loss": 1.121, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00400355871886121, |
|
"grad_norm": 0.3911682069301605, |
|
"learning_rate": 0.0001991959798994975, |
|
"loss": 1.1039, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004448398576512456, |
|
"grad_norm": 0.3546655476093292, |
|
"learning_rate": 0.00019899497487437187, |
|
"loss": 1.1259, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004893238434163701, |
|
"grad_norm": 0.37283238768577576, |
|
"learning_rate": 0.00019879396984924622, |
|
"loss": 1.1599, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.005338078291814947, |
|
"grad_norm": 0.35209548473358154, |
|
"learning_rate": 0.00019859296482412062, |
|
"loss": 1.2022, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.005782918149466192, |
|
"grad_norm": 0.3735697865486145, |
|
"learning_rate": 0.000198391959798995, |
|
"loss": 1.2544, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.006227758007117438, |
|
"grad_norm": 0.324276328086853, |
|
"learning_rate": 0.00019819095477386937, |
|
"loss": 1.1451, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0066725978647686835, |
|
"grad_norm": 0.44405269622802734, |
|
"learning_rate": 0.0001979899497487437, |
|
"loss": 1.2581, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0071174377224199285, |
|
"grad_norm": 0.40987834334373474, |
|
"learning_rate": 0.0001977889447236181, |
|
"loss": 1.1422, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.007562277580071174, |
|
"grad_norm": 0.31967031955718994, |
|
"learning_rate": 0.00019758793969849249, |
|
"loss": 1.1521, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.00800711743772242, |
|
"grad_norm": 0.3634157180786133, |
|
"learning_rate": 0.00019738693467336683, |
|
"loss": 1.0172, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.008451957295373666, |
|
"grad_norm": 0.37751761078834534, |
|
"learning_rate": 0.0001971859296482412, |
|
"loss": 1.1343, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.008896797153024912, |
|
"grad_norm": 0.3501513600349426, |
|
"learning_rate": 0.0001969849246231156, |
|
"loss": 1.1137, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009341637010676156, |
|
"grad_norm": 0.41018760204315186, |
|
"learning_rate": 0.00019678391959798995, |
|
"loss": 1.0601, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.009786476868327402, |
|
"grad_norm": 0.5334712266921997, |
|
"learning_rate": 0.00019658291457286432, |
|
"loss": 1.2332, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.010231316725978648, |
|
"grad_norm": 0.5504945516586304, |
|
"learning_rate": 0.0001963819095477387, |
|
"loss": 1.1928, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.010676156583629894, |
|
"grad_norm": 0.39751070737838745, |
|
"learning_rate": 0.0001961809045226131, |
|
"loss": 0.9774, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01112099644128114, |
|
"grad_norm": 0.31996771693229675, |
|
"learning_rate": 0.00019597989949748744, |
|
"loss": 1.042, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.011565836298932384, |
|
"grad_norm": 0.49899256229400635, |
|
"learning_rate": 0.00019577889447236181, |
|
"loss": 1.0384, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01201067615658363, |
|
"grad_norm": 0.37340229749679565, |
|
"learning_rate": 0.0001955778894472362, |
|
"loss": 1.0307, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.012455516014234875, |
|
"grad_norm": 0.33216455578804016, |
|
"learning_rate": 0.00019537688442211056, |
|
"loss": 1.0253, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.012900355871886121, |
|
"grad_norm": 0.3582470715045929, |
|
"learning_rate": 0.00019517587939698493, |
|
"loss": 1.0751, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.013345195729537367, |
|
"grad_norm": 0.3682143986225128, |
|
"learning_rate": 0.0001949748743718593, |
|
"loss": 1.0633, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.013790035587188613, |
|
"grad_norm": 0.4105437695980072, |
|
"learning_rate": 0.00019477386934673368, |
|
"loss": 1.1865, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.014234875444839857, |
|
"grad_norm": 0.3709512948989868, |
|
"learning_rate": 0.00019457286432160805, |
|
"loss": 1.0198, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.014679715302491103, |
|
"grad_norm": 0.3693946599960327, |
|
"learning_rate": 0.00019437185929648243, |
|
"loss": 1.0405, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.015124555160142349, |
|
"grad_norm": 0.335426390171051, |
|
"learning_rate": 0.0001941708542713568, |
|
"loss": 0.9301, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.015569395017793594, |
|
"grad_norm": 0.5277281403541565, |
|
"learning_rate": 0.00019396984924623117, |
|
"loss": 0.9615, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01601423487544484, |
|
"grad_norm": 0.40925148129463196, |
|
"learning_rate": 0.00019376884422110552, |
|
"loss": 1.1093, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.016459074733096084, |
|
"grad_norm": 0.40884852409362793, |
|
"learning_rate": 0.00019356783919597992, |
|
"loss": 1.063, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.016903914590747332, |
|
"grad_norm": 0.3492753207683563, |
|
"learning_rate": 0.0001933668341708543, |
|
"loss": 1.0865, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.017348754448398576, |
|
"grad_norm": 0.3660659193992615, |
|
"learning_rate": 0.00019316582914572864, |
|
"loss": 1.071, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.017793594306049824, |
|
"grad_norm": 0.3863303065299988, |
|
"learning_rate": 0.000192964824120603, |
|
"loss": 1.0096, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.018238434163701068, |
|
"grad_norm": 0.34570327401161194, |
|
"learning_rate": 0.0001927638190954774, |
|
"loss": 1.0753, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.018683274021352312, |
|
"grad_norm": 0.35912126302719116, |
|
"learning_rate": 0.00019256281407035178, |
|
"loss": 1.0997, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.01912811387900356, |
|
"grad_norm": 0.36907467246055603, |
|
"learning_rate": 0.00019236180904522613, |
|
"loss": 1.0419, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.019572953736654804, |
|
"grad_norm": 0.3746166229248047, |
|
"learning_rate": 0.0001921608040201005, |
|
"loss": 1.0424, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02001779359430605, |
|
"grad_norm": 0.33465832471847534, |
|
"learning_rate": 0.0001919597989949749, |
|
"loss": 1.0335, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.020462633451957295, |
|
"grad_norm": 0.3497348427772522, |
|
"learning_rate": 0.00019175879396984925, |
|
"loss": 1.0282, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02090747330960854, |
|
"grad_norm": 0.34102576971054077, |
|
"learning_rate": 0.00019155778894472362, |
|
"loss": 1.1197, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.021352313167259787, |
|
"grad_norm": 0.31394055485725403, |
|
"learning_rate": 0.000191356783919598, |
|
"loss": 1.0298, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02179715302491103, |
|
"grad_norm": 0.505631148815155, |
|
"learning_rate": 0.0001911557788944724, |
|
"loss": 1.0349, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02224199288256228, |
|
"grad_norm": 0.3988608121871948, |
|
"learning_rate": 0.00019095477386934674, |
|
"loss": 0.9868, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.022686832740213523, |
|
"grad_norm": 0.3646712899208069, |
|
"learning_rate": 0.0001907537688442211, |
|
"loss": 1.0884, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.023131672597864767, |
|
"grad_norm": 0.35094475746154785, |
|
"learning_rate": 0.00019055276381909548, |
|
"loss": 1.1207, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.023576512455516015, |
|
"grad_norm": 0.34856799244880676, |
|
"learning_rate": 0.00019035175879396986, |
|
"loss": 1.0121, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.02402135231316726, |
|
"grad_norm": 0.37425950169563293, |
|
"learning_rate": 0.00019015075376884423, |
|
"loss": 1.1073, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.024466192170818506, |
|
"grad_norm": 0.3625994026660919, |
|
"learning_rate": 0.0001899497487437186, |
|
"loss": 1.0863, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02491103202846975, |
|
"grad_norm": 0.295337051153183, |
|
"learning_rate": 0.00018974874371859298, |
|
"loss": 1.0561, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.025355871886120998, |
|
"grad_norm": 0.34300458431243896, |
|
"learning_rate": 0.00018954773869346732, |
|
"loss": 0.9955, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.025800711743772242, |
|
"grad_norm": 0.45535075664520264, |
|
"learning_rate": 0.00018934673366834172, |
|
"loss": 1.0623, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.026245551601423486, |
|
"grad_norm": 0.3560766577720642, |
|
"learning_rate": 0.0001891457286432161, |
|
"loss": 1.0259, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.026690391459074734, |
|
"grad_norm": 0.33135348558425903, |
|
"learning_rate": 0.00018894472361809047, |
|
"loss": 1.0823, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.027135231316725978, |
|
"grad_norm": 0.33519864082336426, |
|
"learning_rate": 0.00018874371859296481, |
|
"loss": 0.8893, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.027580071174377226, |
|
"grad_norm": 0.4156452417373657, |
|
"learning_rate": 0.00018854271356783921, |
|
"loss": 0.888, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.02802491103202847, |
|
"grad_norm": 0.5566922426223755, |
|
"learning_rate": 0.0001883417085427136, |
|
"loss": 0.9465, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.028469750889679714, |
|
"grad_norm": 0.3567597270011902, |
|
"learning_rate": 0.00018814070351758793, |
|
"loss": 1.044, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.02891459074733096, |
|
"grad_norm": 0.3937987685203552, |
|
"learning_rate": 0.0001879396984924623, |
|
"loss": 1.1491, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.029359430604982206, |
|
"grad_norm": 0.3297279477119446, |
|
"learning_rate": 0.0001877386934673367, |
|
"loss": 1.1387, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.029804270462633453, |
|
"grad_norm": 0.4475502371788025, |
|
"learning_rate": 0.00018753768844221108, |
|
"loss": 1.1832, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.030249110320284697, |
|
"grad_norm": 0.4012911021709442, |
|
"learning_rate": 0.00018733668341708543, |
|
"loss": 1.1017, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03069395017793594, |
|
"grad_norm": 0.36798158288002014, |
|
"learning_rate": 0.0001871356783919598, |
|
"loss": 0.9889, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.03113879003558719, |
|
"grad_norm": 0.3330882787704468, |
|
"learning_rate": 0.0001869346733668342, |
|
"loss": 1.0825, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03158362989323844, |
|
"grad_norm": 0.41344648599624634, |
|
"learning_rate": 0.00018673366834170854, |
|
"loss": 1.0599, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.03202846975088968, |
|
"grad_norm": 0.5002074837684631, |
|
"learning_rate": 0.00018653266331658292, |
|
"loss": 1.0586, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.032473309608540925, |
|
"grad_norm": 0.3306974470615387, |
|
"learning_rate": 0.0001863316582914573, |
|
"loss": 1.0004, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.03291814946619217, |
|
"grad_norm": 0.394704133272171, |
|
"learning_rate": 0.0001861306532663317, |
|
"loss": 1.0083, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03336298932384341, |
|
"grad_norm": 0.3663751184940338, |
|
"learning_rate": 0.00018592964824120604, |
|
"loss": 0.8553, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.033807829181494664, |
|
"grad_norm": 0.37072721123695374, |
|
"learning_rate": 0.0001857286432160804, |
|
"loss": 1.1199, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03425266903914591, |
|
"grad_norm": 0.37048038840293884, |
|
"learning_rate": 0.00018552763819095478, |
|
"loss": 1.0716, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.03469750889679715, |
|
"grad_norm": 0.34426698088645935, |
|
"learning_rate": 0.00018532663316582915, |
|
"loss": 1.0378, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.035142348754448396, |
|
"grad_norm": 0.3649255037307739, |
|
"learning_rate": 0.00018512562814070353, |
|
"loss": 1.0843, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03558718861209965, |
|
"grad_norm": 0.37399303913116455, |
|
"learning_rate": 0.0001849246231155779, |
|
"loss": 0.9741, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03603202846975089, |
|
"grad_norm": 0.3313739001750946, |
|
"learning_rate": 0.00018472361809045227, |
|
"loss": 0.9883, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.036476868327402136, |
|
"grad_norm": 0.36365818977355957, |
|
"learning_rate": 0.00018452261306532662, |
|
"loss": 1.0434, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03692170818505338, |
|
"grad_norm": 0.33248934149742126, |
|
"learning_rate": 0.00018432160804020102, |
|
"loss": 1.0635, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.037366548042704624, |
|
"grad_norm": 0.361134797334671, |
|
"learning_rate": 0.0001841206030150754, |
|
"loss": 1.0248, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.037811387900355875, |
|
"grad_norm": 0.3499568700790405, |
|
"learning_rate": 0.00018391959798994977, |
|
"loss": 0.9905, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03825622775800712, |
|
"grad_norm": 0.3246215879917145, |
|
"learning_rate": 0.0001837185929648241, |
|
"loss": 1.0431, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.03870106761565836, |
|
"grad_norm": 0.39000576734542847, |
|
"learning_rate": 0.0001835175879396985, |
|
"loss": 0.995, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.03914590747330961, |
|
"grad_norm": 0.32663071155548096, |
|
"learning_rate": 0.00018331658291457288, |
|
"loss": 1.0976, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.03959074733096085, |
|
"grad_norm": 0.35704660415649414, |
|
"learning_rate": 0.00018311557788944723, |
|
"loss": 1.088, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0400355871886121, |
|
"grad_norm": 0.3753162622451782, |
|
"learning_rate": 0.0001829145728643216, |
|
"loss": 1.0509, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04048042704626335, |
|
"grad_norm": 0.45394080877304077, |
|
"learning_rate": 0.000182713567839196, |
|
"loss": 0.8953, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.04092526690391459, |
|
"grad_norm": 0.35750091075897217, |
|
"learning_rate": 0.00018251256281407038, |
|
"loss": 1.0031, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.041370106761565835, |
|
"grad_norm": 0.2915020287036896, |
|
"learning_rate": 0.00018231155778894472, |
|
"loss": 0.9292, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.04181494661921708, |
|
"grad_norm": 0.29004380106925964, |
|
"learning_rate": 0.0001821105527638191, |
|
"loss": 0.9423, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.04225978647686833, |
|
"grad_norm": 0.3510340750217438, |
|
"learning_rate": 0.0001819095477386935, |
|
"loss": 0.9933, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.042704626334519574, |
|
"grad_norm": 0.3953409194946289, |
|
"learning_rate": 0.00018170854271356784, |
|
"loss": 1.0174, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04314946619217082, |
|
"grad_norm": 0.3693845272064209, |
|
"learning_rate": 0.00018150753768844221, |
|
"loss": 1.1309, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.04359430604982206, |
|
"grad_norm": 0.33780723810195923, |
|
"learning_rate": 0.0001813065326633166, |
|
"loss": 0.995, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.04403914590747331, |
|
"grad_norm": 0.39444178342819214, |
|
"learning_rate": 0.00018110552763819096, |
|
"loss": 1.0567, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.04448398576512456, |
|
"grad_norm": 0.3430976867675781, |
|
"learning_rate": 0.00018090452261306533, |
|
"loss": 0.956, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0449288256227758, |
|
"grad_norm": 0.31412309408187866, |
|
"learning_rate": 0.0001807035175879397, |
|
"loss": 1.1141, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.045373665480427046, |
|
"grad_norm": 0.3321763575077057, |
|
"learning_rate": 0.00018050251256281408, |
|
"loss": 1.0245, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.04581850533807829, |
|
"grad_norm": 0.3730502128601074, |
|
"learning_rate": 0.00018030150753768845, |
|
"loss": 1.0961, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.046263345195729534, |
|
"grad_norm": 0.33242374658584595, |
|
"learning_rate": 0.00018010050251256282, |
|
"loss": 0.9899, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.046708185053380785, |
|
"grad_norm": 0.3853437900543213, |
|
"learning_rate": 0.0001798994974874372, |
|
"loss": 0.9559, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.04715302491103203, |
|
"grad_norm": 0.33745744824409485, |
|
"learning_rate": 0.00017969849246231157, |
|
"loss": 1.0458, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.04759786476868327, |
|
"grad_norm": 0.3601361811161041, |
|
"learning_rate": 0.00017949748743718592, |
|
"loss": 1.0801, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.04804270462633452, |
|
"grad_norm": 0.4157007932662964, |
|
"learning_rate": 0.00017929648241206032, |
|
"loss": 0.9852, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04848754448398576, |
|
"grad_norm": 0.2746104598045349, |
|
"learning_rate": 0.0001790954773869347, |
|
"loss": 1.0247, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.04893238434163701, |
|
"grad_norm": 0.3099765181541443, |
|
"learning_rate": 0.00017889447236180906, |
|
"loss": 0.9642, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04937722419928826, |
|
"grad_norm": 0.3376833498477936, |
|
"learning_rate": 0.0001786934673366834, |
|
"loss": 1.0203, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0498220640569395, |
|
"grad_norm": 0.3929193913936615, |
|
"learning_rate": 0.0001784924623115578, |
|
"loss": 1.1084, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.050266903914590745, |
|
"grad_norm": 0.3145126402378082, |
|
"learning_rate": 0.00017829145728643218, |
|
"loss": 1.124, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.050711743772241996, |
|
"grad_norm": 0.3065921664237976, |
|
"learning_rate": 0.00017809045226130653, |
|
"loss": 1.0453, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.05115658362989324, |
|
"grad_norm": 0.3715677857398987, |
|
"learning_rate": 0.0001778894472361809, |
|
"loss": 0.9841, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.051601423487544484, |
|
"grad_norm": 0.36604928970336914, |
|
"learning_rate": 0.0001776884422110553, |
|
"loss": 0.9419, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.05204626334519573, |
|
"grad_norm": 0.37098243832588196, |
|
"learning_rate": 0.00017748743718592967, |
|
"loss": 1.0133, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.05249110320284697, |
|
"grad_norm": 0.3711595833301544, |
|
"learning_rate": 0.00017728643216080402, |
|
"loss": 1.0324, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.052935943060498224, |
|
"grad_norm": 0.3096163868904114, |
|
"learning_rate": 0.0001770854271356784, |
|
"loss": 1.0086, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.05338078291814947, |
|
"grad_norm": 0.4001842737197876, |
|
"learning_rate": 0.0001768844221105528, |
|
"loss": 1.026, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05382562277580071, |
|
"grad_norm": 0.29020652174949646, |
|
"learning_rate": 0.00017668341708542714, |
|
"loss": 0.9578, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.054270462633451956, |
|
"grad_norm": 0.35916441679000854, |
|
"learning_rate": 0.0001764824120603015, |
|
"loss": 1.0502, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0547153024911032, |
|
"grad_norm": 0.36931201815605164, |
|
"learning_rate": 0.00017628140703517588, |
|
"loss": 0.9456, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.05516014234875445, |
|
"grad_norm": 0.37013575434684753, |
|
"learning_rate": 0.00017608040201005026, |
|
"loss": 1.0472, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.055604982206405695, |
|
"grad_norm": 0.3738909959793091, |
|
"learning_rate": 0.00017587939698492463, |
|
"loss": 1.0194, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05604982206405694, |
|
"grad_norm": 0.4676179885864258, |
|
"learning_rate": 0.000175678391959799, |
|
"loss": 1.1556, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.056494661921708184, |
|
"grad_norm": 0.430128812789917, |
|
"learning_rate": 0.00017547738693467338, |
|
"loss": 1.1472, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.05693950177935943, |
|
"grad_norm": 0.3177819848060608, |
|
"learning_rate": 0.00017527638190954775, |
|
"loss": 1.0159, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.05738434163701068, |
|
"grad_norm": 0.35535189509391785, |
|
"learning_rate": 0.00017507537688442212, |
|
"loss": 1.0388, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.05782918149466192, |
|
"grad_norm": 0.41556811332702637, |
|
"learning_rate": 0.0001748743718592965, |
|
"loss": 1.0366, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05827402135231317, |
|
"grad_norm": 0.28733861446380615, |
|
"learning_rate": 0.00017467336683417087, |
|
"loss": 1.041, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.05871886120996441, |
|
"grad_norm": 0.36875680088996887, |
|
"learning_rate": 0.00017447236180904521, |
|
"loss": 1.0408, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.059163701067615655, |
|
"grad_norm": 0.30114784836769104, |
|
"learning_rate": 0.00017427135678391961, |
|
"loss": 1.0582, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.059608540925266906, |
|
"grad_norm": 0.397324800491333, |
|
"learning_rate": 0.000174070351758794, |
|
"loss": 1.0715, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.06005338078291815, |
|
"grad_norm": 0.30825501680374146, |
|
"learning_rate": 0.00017386934673366836, |
|
"loss": 0.8977, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.060498220640569395, |
|
"grad_norm": 0.34875619411468506, |
|
"learning_rate": 0.0001736683417085427, |
|
"loss": 1.0427, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.06094306049822064, |
|
"grad_norm": 0.44976702332496643, |
|
"learning_rate": 0.0001734673366834171, |
|
"loss": 1.0594, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.06138790035587188, |
|
"grad_norm": 0.4009503722190857, |
|
"learning_rate": 0.00017326633165829148, |
|
"loss": 1.0156, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.061832740213523134, |
|
"grad_norm": 0.32680168747901917, |
|
"learning_rate": 0.00017306532663316582, |
|
"loss": 1.0518, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.06227758007117438, |
|
"grad_norm": 0.3297230005264282, |
|
"learning_rate": 0.0001728643216080402, |
|
"loss": 0.982, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06272241992882563, |
|
"grad_norm": 0.3223113417625427, |
|
"learning_rate": 0.0001726633165829146, |
|
"loss": 0.9451, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.06316725978647687, |
|
"grad_norm": 0.3400196135044098, |
|
"learning_rate": 0.00017246231155778897, |
|
"loss": 0.9247, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.06361209964412812, |
|
"grad_norm": 0.36805739998817444, |
|
"learning_rate": 0.00017226130653266332, |
|
"loss": 1.0732, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.06405693950177936, |
|
"grad_norm": 0.40036970376968384, |
|
"learning_rate": 0.0001720603015075377, |
|
"loss": 1.0966, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.0645017793594306, |
|
"grad_norm": 0.3755742311477661, |
|
"learning_rate": 0.00017185929648241206, |
|
"loss": 0.9863, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.06494661921708185, |
|
"grad_norm": 0.4415525496006012, |
|
"learning_rate": 0.00017165829145728644, |
|
"loss": 1.0933, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0653914590747331, |
|
"grad_norm": 0.3682146966457367, |
|
"learning_rate": 0.0001714572864321608, |
|
"loss": 0.9468, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.06583629893238434, |
|
"grad_norm": 0.3670349717140198, |
|
"learning_rate": 0.00017125628140703518, |
|
"loss": 1.1367, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.06628113879003558, |
|
"grad_norm": 0.30022603273391724, |
|
"learning_rate": 0.00017105527638190955, |
|
"loss": 0.9387, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.06672597864768683, |
|
"grad_norm": 0.36275291442871094, |
|
"learning_rate": 0.00017085427135678393, |
|
"loss": 0.8882, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06717081850533808, |
|
"grad_norm": 0.4646683633327484, |
|
"learning_rate": 0.0001706532663316583, |
|
"loss": 0.8556, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.06761565836298933, |
|
"grad_norm": 0.34833550453186035, |
|
"learning_rate": 0.00017045226130653267, |
|
"loss": 1.0124, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.06806049822064057, |
|
"grad_norm": 0.34811916947364807, |
|
"learning_rate": 0.00017025125628140705, |
|
"loss": 0.8668, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.06850533807829182, |
|
"grad_norm": 0.42011362314224243, |
|
"learning_rate": 0.00017005025125628142, |
|
"loss": 0.9818, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.06895017793594306, |
|
"grad_norm": 0.34665143489837646, |
|
"learning_rate": 0.0001698492462311558, |
|
"loss": 0.9958, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0693950177935943, |
|
"grad_norm": 0.3390451967716217, |
|
"learning_rate": 0.00016964824120603016, |
|
"loss": 0.9835, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.06983985765124555, |
|
"grad_norm": 0.3538704812526703, |
|
"learning_rate": 0.0001694472361809045, |
|
"loss": 1.102, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.07028469750889679, |
|
"grad_norm": 0.40229830145835876, |
|
"learning_rate": 0.0001692462311557789, |
|
"loss": 1.1512, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.07072953736654804, |
|
"grad_norm": 0.37556755542755127, |
|
"learning_rate": 0.00016904522613065328, |
|
"loss": 1.0165, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.0711743772241993, |
|
"grad_norm": 0.33789652585983276, |
|
"learning_rate": 0.00016884422110552766, |
|
"loss": 0.9962, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07161921708185054, |
|
"grad_norm": 0.39385226368904114, |
|
"learning_rate": 0.000168643216080402, |
|
"loss": 1.0538, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.07206405693950178, |
|
"grad_norm": 0.39948439598083496, |
|
"learning_rate": 0.0001684422110552764, |
|
"loss": 1.0472, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.07250889679715303, |
|
"grad_norm": 0.3786976635456085, |
|
"learning_rate": 0.00016824120603015078, |
|
"loss": 1.0288, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.07295373665480427, |
|
"grad_norm": 0.3729378879070282, |
|
"learning_rate": 0.00016804020100502512, |
|
"loss": 0.9856, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.07339857651245552, |
|
"grad_norm": 0.36763879656791687, |
|
"learning_rate": 0.0001678391959798995, |
|
"loss": 0.9711, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07384341637010676, |
|
"grad_norm": 0.38680917024612427, |
|
"learning_rate": 0.0001676381909547739, |
|
"loss": 0.8915, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.074288256227758, |
|
"grad_norm": 0.4449096620082855, |
|
"learning_rate": 0.00016743718592964827, |
|
"loss": 0.9931, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.07473309608540925, |
|
"grad_norm": 0.39082011580467224, |
|
"learning_rate": 0.0001672361809045226, |
|
"loss": 1.0084, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.07517793594306049, |
|
"grad_norm": 0.37978312373161316, |
|
"learning_rate": 0.00016703517587939699, |
|
"loss": 0.9449, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.07562277580071175, |
|
"grad_norm": 0.32479336857795715, |
|
"learning_rate": 0.00016683417085427136, |
|
"loss": 0.8941, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.076067615658363, |
|
"grad_norm": 0.35645660758018494, |
|
"learning_rate": 0.00016663316582914573, |
|
"loss": 0.9952, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.07651245551601424, |
|
"grad_norm": 0.40985000133514404, |
|
"learning_rate": 0.0001664321608040201, |
|
"loss": 1.0187, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.07695729537366548, |
|
"grad_norm": 0.4032020568847656, |
|
"learning_rate": 0.00016623115577889448, |
|
"loss": 0.9356, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.07740213523131673, |
|
"grad_norm": 0.3428996801376343, |
|
"learning_rate": 0.00016603015075376885, |
|
"loss": 0.9326, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.07784697508896797, |
|
"grad_norm": 0.2806510925292969, |
|
"learning_rate": 0.00016582914572864322, |
|
"loss": 0.6118, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07829181494661921, |
|
"grad_norm": 0.3010331392288208, |
|
"learning_rate": 0.0001656281407035176, |
|
"loss": 1.0065, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.07873665480427046, |
|
"grad_norm": 0.2989744246006012, |
|
"learning_rate": 0.00016542713567839197, |
|
"loss": 0.948, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.0791814946619217, |
|
"grad_norm": 0.3374849259853363, |
|
"learning_rate": 0.00016522613065326634, |
|
"loss": 1.0899, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.07962633451957295, |
|
"grad_norm": 0.38859498500823975, |
|
"learning_rate": 0.00016502512562814072, |
|
"loss": 1.035, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.0800711743772242, |
|
"grad_norm": 0.34474217891693115, |
|
"learning_rate": 0.0001648241206030151, |
|
"loss": 0.9826, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08051601423487545, |
|
"grad_norm": 0.2957572937011719, |
|
"learning_rate": 0.00016462311557788946, |
|
"loss": 0.8905, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.0809608540925267, |
|
"grad_norm": 0.3576017916202545, |
|
"learning_rate": 0.0001644221105527638, |
|
"loss": 1.0266, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.08140569395017794, |
|
"grad_norm": 0.38848915696144104, |
|
"learning_rate": 0.0001642211055276382, |
|
"loss": 0.9883, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.08185053380782918, |
|
"grad_norm": 0.4087153375148773, |
|
"learning_rate": 0.00016402010050251258, |
|
"loss": 1.1882, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.08229537366548043, |
|
"grad_norm": 0.3468906879425049, |
|
"learning_rate": 0.00016381909547738695, |
|
"loss": 0.9818, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08274021352313167, |
|
"grad_norm": 0.4617744982242584, |
|
"learning_rate": 0.0001636180904522613, |
|
"loss": 0.9065, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.08318505338078291, |
|
"grad_norm": 0.4207713305950165, |
|
"learning_rate": 0.0001634170854271357, |
|
"loss": 0.9988, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.08362989323843416, |
|
"grad_norm": 0.3632107675075531, |
|
"learning_rate": 0.00016321608040201007, |
|
"loss": 0.9049, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.08407473309608542, |
|
"grad_norm": 0.4303954839706421, |
|
"learning_rate": 0.00016301507537688442, |
|
"loss": 0.9801, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.08451957295373666, |
|
"grad_norm": 0.3233638107776642, |
|
"learning_rate": 0.0001628140703517588, |
|
"loss": 1.1457, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0849644128113879, |
|
"grad_norm": 0.36270812153816223, |
|
"learning_rate": 0.00016261306532663316, |
|
"loss": 0.9804, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.08540925266903915, |
|
"grad_norm": 0.3483213484287262, |
|
"learning_rate": 0.00016241206030150756, |
|
"loss": 0.9412, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.08585409252669039, |
|
"grad_norm": 0.33415859937667847, |
|
"learning_rate": 0.0001622110552763819, |
|
"loss": 1.0479, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.08629893238434164, |
|
"grad_norm": 0.3390806317329407, |
|
"learning_rate": 0.00016201005025125628, |
|
"loss": 1.0266, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.08674377224199288, |
|
"grad_norm": 0.3439917266368866, |
|
"learning_rate": 0.00016180904522613066, |
|
"loss": 0.9495, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.08718861209964412, |
|
"grad_norm": 0.5404065847396851, |
|
"learning_rate": 0.00016160804020100503, |
|
"loss": 1.0176, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.08763345195729537, |
|
"grad_norm": 0.40834537148475647, |
|
"learning_rate": 0.0001614070351758794, |
|
"loss": 1.0119, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.08807829181494661, |
|
"grad_norm": 0.41839244961738586, |
|
"learning_rate": 0.00016120603015075378, |
|
"loss": 1.0006, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.08852313167259787, |
|
"grad_norm": 0.36179983615875244, |
|
"learning_rate": 0.00016100502512562815, |
|
"loss": 1.0247, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.08896797153024912, |
|
"grad_norm": 0.34288227558135986, |
|
"learning_rate": 0.00016080402010050252, |
|
"loss": 0.9545, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08941281138790036, |
|
"grad_norm": 0.37949255108833313, |
|
"learning_rate": 0.0001606030150753769, |
|
"loss": 1.0024, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.0898576512455516, |
|
"grad_norm": 0.4100090563297272, |
|
"learning_rate": 0.00016040201005025127, |
|
"loss": 1.0465, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.09030249110320285, |
|
"grad_norm": 0.3741397559642792, |
|
"learning_rate": 0.00016020100502512564, |
|
"loss": 0.8969, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.09074733096085409, |
|
"grad_norm": 0.4092821180820465, |
|
"learning_rate": 0.00016, |
|
"loss": 1.3054, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.09119217081850534, |
|
"grad_norm": 0.29491907358169556, |
|
"learning_rate": 0.00015979899497487439, |
|
"loss": 1.0039, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09163701067615658, |
|
"grad_norm": 0.38052767515182495, |
|
"learning_rate": 0.00015959798994974876, |
|
"loss": 0.9034, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.09208185053380782, |
|
"grad_norm": 0.34436798095703125, |
|
"learning_rate": 0.0001593969849246231, |
|
"loss": 0.9621, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.09252669039145907, |
|
"grad_norm": 0.41748183965682983, |
|
"learning_rate": 0.0001591959798994975, |
|
"loss": 1.0112, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.09297153024911033, |
|
"grad_norm": 0.35025399923324585, |
|
"learning_rate": 0.00015899497487437188, |
|
"loss": 1.0594, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.09341637010676157, |
|
"grad_norm": 0.3578777611255646, |
|
"learning_rate": 0.00015879396984924625, |
|
"loss": 1.0191, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09386120996441281, |
|
"grad_norm": 0.3903699517250061, |
|
"learning_rate": 0.0001585929648241206, |
|
"loss": 1.0492, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.09430604982206406, |
|
"grad_norm": 0.34571701288223267, |
|
"learning_rate": 0.000158391959798995, |
|
"loss": 1.07, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.0947508896797153, |
|
"grad_norm": 0.3766401410102844, |
|
"learning_rate": 0.00015819095477386937, |
|
"loss": 0.9508, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.09519572953736655, |
|
"grad_norm": 0.3765754699707031, |
|
"learning_rate": 0.00015798994974874372, |
|
"loss": 0.9604, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.09564056939501779, |
|
"grad_norm": 0.39237186312675476, |
|
"learning_rate": 0.0001577889447236181, |
|
"loss": 0.9678, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.09608540925266904, |
|
"grad_norm": 0.43379107117652893, |
|
"learning_rate": 0.00015758793969849246, |
|
"loss": 0.9569, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.09653024911032028, |
|
"grad_norm": 0.44235309958457947, |
|
"learning_rate": 0.00015738693467336686, |
|
"loss": 1.047, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.09697508896797152, |
|
"grad_norm": 0.37492284178733826, |
|
"learning_rate": 0.0001571859296482412, |
|
"loss": 0.9247, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.09741992882562278, |
|
"grad_norm": 0.44187766313552856, |
|
"learning_rate": 0.00015698492462311558, |
|
"loss": 1.0136, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.09786476868327403, |
|
"grad_norm": 0.41393113136291504, |
|
"learning_rate": 0.00015678391959798995, |
|
"loss": 1.0051, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09830960854092527, |
|
"grad_norm": 0.3643686771392822, |
|
"learning_rate": 0.00015658291457286433, |
|
"loss": 1.1171, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.09875444839857651, |
|
"grad_norm": 0.351253479719162, |
|
"learning_rate": 0.0001563819095477387, |
|
"loss": 1.0466, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.09919928825622776, |
|
"grad_norm": 0.35708218812942505, |
|
"learning_rate": 0.00015618090452261307, |
|
"loss": 1.0688, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.099644128113879, |
|
"grad_norm": 0.3984560966491699, |
|
"learning_rate": 0.00015597989949748745, |
|
"loss": 0.9606, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.10008896797153025, |
|
"grad_norm": 0.3412669897079468, |
|
"learning_rate": 0.00015577889447236182, |
|
"loss": 0.9781, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10053380782918149, |
|
"grad_norm": 0.36613014340400696, |
|
"learning_rate": 0.0001555778894472362, |
|
"loss": 0.9449, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.10097864768683273, |
|
"grad_norm": 0.3568494915962219, |
|
"learning_rate": 0.00015537688442211056, |
|
"loss": 0.9679, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.10142348754448399, |
|
"grad_norm": 0.4687019884586334, |
|
"learning_rate": 0.00015517587939698494, |
|
"loss": 1.0386, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.10186832740213524, |
|
"grad_norm": 0.40447989106178284, |
|
"learning_rate": 0.0001549748743718593, |
|
"loss": 0.9812, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.10231316725978648, |
|
"grad_norm": 0.39272576570510864, |
|
"learning_rate": 0.00015477386934673368, |
|
"loss": 1.0165, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10275800711743772, |
|
"grad_norm": 0.3780848979949951, |
|
"learning_rate": 0.00015457286432160806, |
|
"loss": 0.9606, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.10320284697508897, |
|
"grad_norm": 0.46794167160987854, |
|
"learning_rate": 0.0001543718592964824, |
|
"loss": 0.9305, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.10364768683274021, |
|
"grad_norm": 0.39729002118110657, |
|
"learning_rate": 0.0001541708542713568, |
|
"loss": 1.0113, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.10409252669039146, |
|
"grad_norm": 0.4045659005641937, |
|
"learning_rate": 0.00015396984924623117, |
|
"loss": 0.9961, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.1045373665480427, |
|
"grad_norm": 0.35122302174568176, |
|
"learning_rate": 0.00015376884422110555, |
|
"loss": 0.9891, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.10498220640569395, |
|
"grad_norm": 0.37050357460975647, |
|
"learning_rate": 0.0001535678391959799, |
|
"loss": 1.0024, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.10542704626334519, |
|
"grad_norm": 0.40677326917648315, |
|
"learning_rate": 0.00015336683417085427, |
|
"loss": 1.0948, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.10587188612099645, |
|
"grad_norm": 0.35067057609558105, |
|
"learning_rate": 0.00015316582914572867, |
|
"loss": 0.9897, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.10631672597864769, |
|
"grad_norm": 0.3920201063156128, |
|
"learning_rate": 0.000152964824120603, |
|
"loss": 1.0114, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.10676156583629894, |
|
"grad_norm": 0.3613733649253845, |
|
"learning_rate": 0.00015276381909547739, |
|
"loss": 0.9508, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.10720640569395018, |
|
"grad_norm": 0.35030075907707214, |
|
"learning_rate": 0.00015256281407035176, |
|
"loss": 1.0019, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.10765124555160142, |
|
"grad_norm": 0.4036657512187958, |
|
"learning_rate": 0.00015236180904522613, |
|
"loss": 0.9347, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.10809608540925267, |
|
"grad_norm": 0.3680381178855896, |
|
"learning_rate": 0.0001521608040201005, |
|
"loss": 0.9541, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.10854092526690391, |
|
"grad_norm": 0.7577487826347351, |
|
"learning_rate": 0.00015195979899497488, |
|
"loss": 1.0019, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.10898576512455516, |
|
"grad_norm": 0.32293733954429626, |
|
"learning_rate": 0.00015175879396984925, |
|
"loss": 0.9768, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1094306049822064, |
|
"grad_norm": 0.32959771156311035, |
|
"learning_rate": 0.00015155778894472362, |
|
"loss": 1.0606, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.10987544483985764, |
|
"grad_norm": 0.3493881821632385, |
|
"learning_rate": 0.000151356783919598, |
|
"loss": 1.0317, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.1103202846975089, |
|
"grad_norm": 0.3482673168182373, |
|
"learning_rate": 0.00015115577889447237, |
|
"loss": 0.9157, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.11076512455516015, |
|
"grad_norm": 0.35461658239364624, |
|
"learning_rate": 0.00015095477386934674, |
|
"loss": 1.0753, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.11120996441281139, |
|
"grad_norm": 0.3124016225337982, |
|
"learning_rate": 0.00015075376884422112, |
|
"loss": 0.9352, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11165480427046263, |
|
"grad_norm": 0.3451753854751587, |
|
"learning_rate": 0.0001505527638190955, |
|
"loss": 0.948, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.11209964412811388, |
|
"grad_norm": 0.3436938524246216, |
|
"learning_rate": 0.00015035175879396986, |
|
"loss": 0.9277, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.11254448398576512, |
|
"grad_norm": 0.3544940650463104, |
|
"learning_rate": 0.00015015075376884423, |
|
"loss": 0.976, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.11298932384341637, |
|
"grad_norm": 0.34947720170021057, |
|
"learning_rate": 0.0001499497487437186, |
|
"loss": 1.0602, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.11343416370106761, |
|
"grad_norm": 0.38455042243003845, |
|
"learning_rate": 0.00014974874371859298, |
|
"loss": 0.9524, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.11387900355871886, |
|
"grad_norm": 0.4009455442428589, |
|
"learning_rate": 0.00014954773869346735, |
|
"loss": 1.0465, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.11432384341637011, |
|
"grad_norm": 0.5961493849754333, |
|
"learning_rate": 0.0001493467336683417, |
|
"loss": 0.8681, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.11476868327402136, |
|
"grad_norm": 0.47233107686042786, |
|
"learning_rate": 0.0001491457286432161, |
|
"loss": 0.9355, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.1152135231316726, |
|
"grad_norm": 0.41602540016174316, |
|
"learning_rate": 0.00014894472361809047, |
|
"loss": 1.0256, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.11565836298932385, |
|
"grad_norm": 0.3379324972629547, |
|
"learning_rate": 0.00014874371859296482, |
|
"loss": 0.8982, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11610320284697509, |
|
"grad_norm": 0.3502661883831024, |
|
"learning_rate": 0.0001485427135678392, |
|
"loss": 0.8932, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.11654804270462633, |
|
"grad_norm": 0.35349032282829285, |
|
"learning_rate": 0.00014834170854271356, |
|
"loss": 0.9541, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.11699288256227758, |
|
"grad_norm": 0.37270882725715637, |
|
"learning_rate": 0.00014814070351758796, |
|
"loss": 0.9566, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.11743772241992882, |
|
"grad_norm": 0.3453938364982605, |
|
"learning_rate": 0.0001479396984924623, |
|
"loss": 0.9901, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.11788256227758007, |
|
"grad_norm": 0.4339182376861572, |
|
"learning_rate": 0.00014773869346733668, |
|
"loss": 0.9542, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.11832740213523131, |
|
"grad_norm": 0.3721541166305542, |
|
"learning_rate": 0.00014753768844221106, |
|
"loss": 0.9075, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.11877224199288257, |
|
"grad_norm": 0.3452603220939636, |
|
"learning_rate": 0.00014733668341708543, |
|
"loss": 0.9762, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.11921708185053381, |
|
"grad_norm": 0.33102792501449585, |
|
"learning_rate": 0.0001471356783919598, |
|
"loss": 0.9648, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.11966192170818506, |
|
"grad_norm": 0.38129130005836487, |
|
"learning_rate": 0.00014693467336683417, |
|
"loss": 1.0283, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.1201067615658363, |
|
"grad_norm": 0.3465835452079773, |
|
"learning_rate": 0.00014673366834170855, |
|
"loss": 0.9383, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12055160142348754, |
|
"grad_norm": 0.31291693449020386, |
|
"learning_rate": 0.00014653266331658292, |
|
"loss": 1.0326, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.12099644128113879, |
|
"grad_norm": 0.40111976861953735, |
|
"learning_rate": 0.0001463316582914573, |
|
"loss": 1.0133, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.12144128113879003, |
|
"grad_norm": 0.3763119578361511, |
|
"learning_rate": 0.00014613065326633167, |
|
"loss": 1.0317, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.12188612099644128, |
|
"grad_norm": 0.3577355742454529, |
|
"learning_rate": 0.00014592964824120604, |
|
"loss": 1.0477, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.12233096085409252, |
|
"grad_norm": 0.3452180027961731, |
|
"learning_rate": 0.0001457286432160804, |
|
"loss": 1.0083, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12277580071174377, |
|
"grad_norm": 0.40135836601257324, |
|
"learning_rate": 0.00014552763819095479, |
|
"loss": 0.9175, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.12322064056939502, |
|
"grad_norm": 0.45712020993232727, |
|
"learning_rate": 0.00014532663316582916, |
|
"loss": 0.9563, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.12366548042704627, |
|
"grad_norm": 0.3986268937587738, |
|
"learning_rate": 0.00014512562814070353, |
|
"loss": 0.926, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.12411032028469751, |
|
"grad_norm": 0.3132750988006592, |
|
"learning_rate": 0.0001449246231155779, |
|
"loss": 0.9503, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.12455516014234876, |
|
"grad_norm": 0.34486275911331177, |
|
"learning_rate": 0.00014472361809045228, |
|
"loss": 0.9375, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.3309858739376068, |
|
"learning_rate": 0.00014452261306532665, |
|
"loss": 0.9276, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.12544483985765126, |
|
"grad_norm": 0.3567025363445282, |
|
"learning_rate": 0.000144321608040201, |
|
"loss": 0.9924, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.1258896797153025, |
|
"grad_norm": 0.35258856415748596, |
|
"learning_rate": 0.00014412060301507537, |
|
"loss": 0.974, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.12633451957295375, |
|
"grad_norm": 0.33701589703559875, |
|
"learning_rate": 0.00014391959798994977, |
|
"loss": 1.0021, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.12677935943060498, |
|
"grad_norm": 0.3263789117336273, |
|
"learning_rate": 0.00014371859296482411, |
|
"loss": 1.051, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.12722419928825623, |
|
"grad_norm": 0.39523646235466003, |
|
"learning_rate": 0.0001435175879396985, |
|
"loss": 0.9349, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.12766903914590746, |
|
"grad_norm": 0.3561248183250427, |
|
"learning_rate": 0.00014331658291457286, |
|
"loss": 0.869, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.12811387900355872, |
|
"grad_norm": 0.43630316853523254, |
|
"learning_rate": 0.00014311557788944726, |
|
"loss": 0.9773, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.12855871886120995, |
|
"grad_norm": 0.3555675446987152, |
|
"learning_rate": 0.0001429145728643216, |
|
"loss": 1.0048, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.1290035587188612, |
|
"grad_norm": 0.40323683619499207, |
|
"learning_rate": 0.00014271356783919598, |
|
"loss": 0.999, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12944839857651247, |
|
"grad_norm": 0.3344346284866333, |
|
"learning_rate": 0.00014251256281407035, |
|
"loss": 0.9882, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.1298932384341637, |
|
"grad_norm": 0.3018104135990143, |
|
"learning_rate": 0.00014231155778894473, |
|
"loss": 0.9665, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.13033807829181496, |
|
"grad_norm": 0.3336434066295624, |
|
"learning_rate": 0.0001421105527638191, |
|
"loss": 0.9786, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.1307829181494662, |
|
"grad_norm": 0.3364970088005066, |
|
"learning_rate": 0.00014190954773869347, |
|
"loss": 0.9436, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.13122775800711745, |
|
"grad_norm": 0.3738161027431488, |
|
"learning_rate": 0.00014170854271356784, |
|
"loss": 0.9884, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.13167259786476868, |
|
"grad_norm": 0.34548112750053406, |
|
"learning_rate": 0.00014150753768844222, |
|
"loss": 0.9578, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.13211743772241993, |
|
"grad_norm": 0.4063248336315155, |
|
"learning_rate": 0.0001413065326633166, |
|
"loss": 1.0649, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.13256227758007116, |
|
"grad_norm": 0.4038452208042145, |
|
"learning_rate": 0.00014110552763819096, |
|
"loss": 0.9824, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.13300711743772242, |
|
"grad_norm": 0.3010196089744568, |
|
"learning_rate": 0.00014090452261306534, |
|
"loss": 0.9492, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.13345195729537365, |
|
"grad_norm": 0.3451170325279236, |
|
"learning_rate": 0.0001407035175879397, |
|
"loss": 1.1038, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1338967971530249, |
|
"grad_norm": 0.3674055337905884, |
|
"learning_rate": 0.00014050251256281408, |
|
"loss": 0.9764, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.13434163701067617, |
|
"grad_norm": 0.28219959139823914, |
|
"learning_rate": 0.00014030150753768846, |
|
"loss": 0.8152, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.1347864768683274, |
|
"grad_norm": 0.3264709711074829, |
|
"learning_rate": 0.0001401005025125628, |
|
"loss": 1.0336, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.13523131672597866, |
|
"grad_norm": 0.3442930579185486, |
|
"learning_rate": 0.0001398994974874372, |
|
"loss": 0.9622, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.1356761565836299, |
|
"grad_norm": 0.4365461468696594, |
|
"learning_rate": 0.00013969849246231157, |
|
"loss": 0.9243, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.13612099644128114, |
|
"grad_norm": 0.3317567706108093, |
|
"learning_rate": 0.00013949748743718595, |
|
"loss": 0.9619, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.13656583629893237, |
|
"grad_norm": 0.3600742816925049, |
|
"learning_rate": 0.0001392964824120603, |
|
"loss": 1.0869, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.13701067615658363, |
|
"grad_norm": 0.414174348115921, |
|
"learning_rate": 0.00013909547738693467, |
|
"loss": 0.9696, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.13745551601423486, |
|
"grad_norm": 0.3307357430458069, |
|
"learning_rate": 0.00013889447236180907, |
|
"loss": 0.8673, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.13790035587188612, |
|
"grad_norm": 0.3574574291706085, |
|
"learning_rate": 0.0001386934673366834, |
|
"loss": 1.0225, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13834519572953738, |
|
"grad_norm": 0.3453276455402374, |
|
"learning_rate": 0.00013849246231155778, |
|
"loss": 0.956, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.1387900355871886, |
|
"grad_norm": 0.3369787633419037, |
|
"learning_rate": 0.00013829145728643216, |
|
"loss": 0.9878, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.13923487544483987, |
|
"grad_norm": 0.36153116822242737, |
|
"learning_rate": 0.00013809045226130656, |
|
"loss": 0.9906, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.1396797153024911, |
|
"grad_norm": 0.3499455749988556, |
|
"learning_rate": 0.0001378894472361809, |
|
"loss": 1.0581, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.14012455516014236, |
|
"grad_norm": 0.3346167206764221, |
|
"learning_rate": 0.00013768844221105528, |
|
"loss": 1.1294, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.14056939501779359, |
|
"grad_norm": 0.3535563051700592, |
|
"learning_rate": 0.00013748743718592965, |
|
"loss": 1.0081, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.14101423487544484, |
|
"grad_norm": 0.3797767758369446, |
|
"learning_rate": 0.00013728643216080402, |
|
"loss": 0.9821, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.14145907473309607, |
|
"grad_norm": 0.35535991191864014, |
|
"learning_rate": 0.0001370854271356784, |
|
"loss": 0.9698, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.14190391459074733, |
|
"grad_norm": 0.3628135323524475, |
|
"learning_rate": 0.00013688442211055277, |
|
"loss": 0.8782, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.1423487544483986, |
|
"grad_norm": 0.3347620666027069, |
|
"learning_rate": 0.00013668341708542714, |
|
"loss": 0.9383, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.14279359430604982, |
|
"grad_norm": 0.3387628197669983, |
|
"learning_rate": 0.00013648241206030151, |
|
"loss": 0.9846, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.14323843416370108, |
|
"grad_norm": 0.33139505982398987, |
|
"learning_rate": 0.0001362814070351759, |
|
"loss": 0.9541, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.1436832740213523, |
|
"grad_norm": 0.3996013104915619, |
|
"learning_rate": 0.00013608040201005026, |
|
"loss": 0.9682, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.14412811387900357, |
|
"grad_norm": 0.3609457314014435, |
|
"learning_rate": 0.00013587939698492463, |
|
"loss": 0.9758, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.1445729537366548, |
|
"grad_norm": 0.31257274746894836, |
|
"learning_rate": 0.000135678391959799, |
|
"loss": 0.9793, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.14501779359430605, |
|
"grad_norm": 0.3800847828388214, |
|
"learning_rate": 0.00013547738693467338, |
|
"loss": 1.0033, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.14546263345195729, |
|
"grad_norm": 0.32256045937538147, |
|
"learning_rate": 0.00013527638190954775, |
|
"loss": 0.9944, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.14590747330960854, |
|
"grad_norm": 0.36587563157081604, |
|
"learning_rate": 0.0001350753768844221, |
|
"loss": 1.0223, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.14635231316725977, |
|
"grad_norm": 0.37276628613471985, |
|
"learning_rate": 0.00013487437185929647, |
|
"loss": 0.8559, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.14679715302491103, |
|
"grad_norm": 0.3412066698074341, |
|
"learning_rate": 0.00013467336683417087, |
|
"loss": 0.8826, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1472419928825623, |
|
"grad_norm": 0.33895769715309143, |
|
"learning_rate": 0.00013447236180904524, |
|
"loss": 0.9517, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.14768683274021352, |
|
"grad_norm": 0.3830057680606842, |
|
"learning_rate": 0.0001342713567839196, |
|
"loss": 1.0307, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.14813167259786478, |
|
"grad_norm": 0.3927343487739563, |
|
"learning_rate": 0.00013407035175879396, |
|
"loss": 0.9916, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.148576512455516, |
|
"grad_norm": 0.3220495283603668, |
|
"learning_rate": 0.00013386934673366836, |
|
"loss": 0.9876, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.14902135231316727, |
|
"grad_norm": 0.35559162497520447, |
|
"learning_rate": 0.0001336683417085427, |
|
"loss": 1.0066, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.1494661921708185, |
|
"grad_norm": 0.36672064661979675, |
|
"learning_rate": 0.00013346733668341708, |
|
"loss": 0.9865, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.14991103202846975, |
|
"grad_norm": 0.3622550964355469, |
|
"learning_rate": 0.00013326633165829146, |
|
"loss": 1.0956, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.15035587188612098, |
|
"grad_norm": 0.3213232159614563, |
|
"learning_rate": 0.00013306532663316586, |
|
"loss": 0.9552, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.15080071174377224, |
|
"grad_norm": 0.3889192044734955, |
|
"learning_rate": 0.0001328643216080402, |
|
"loss": 0.9417, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.1512455516014235, |
|
"grad_norm": 0.32667845487594604, |
|
"learning_rate": 0.00013266331658291457, |
|
"loss": 0.9319, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15169039145907473, |
|
"grad_norm": 0.44496315717697144, |
|
"learning_rate": 0.00013246231155778895, |
|
"loss": 0.7937, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.152135231316726, |
|
"grad_norm": 0.371467649936676, |
|
"learning_rate": 0.00013226130653266332, |
|
"loss": 0.9998, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.15258007117437722, |
|
"grad_norm": 0.3509642779827118, |
|
"learning_rate": 0.0001320603015075377, |
|
"loss": 0.9615, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.15302491103202848, |
|
"grad_norm": 0.32483533024787903, |
|
"learning_rate": 0.00013185929648241207, |
|
"loss": 0.9976, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.1534697508896797, |
|
"grad_norm": 0.38299068808555603, |
|
"learning_rate": 0.00013165829145728644, |
|
"loss": 0.8436, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.15391459074733096, |
|
"grad_norm": 0.36412617564201355, |
|
"learning_rate": 0.0001314572864321608, |
|
"loss": 1.0338, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.1543594306049822, |
|
"grad_norm": 0.3295581638813019, |
|
"learning_rate": 0.00013125628140703518, |
|
"loss": 0.9696, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.15480427046263345, |
|
"grad_norm": 0.34034737944602966, |
|
"learning_rate": 0.00013105527638190956, |
|
"loss": 0.9977, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.1552491103202847, |
|
"grad_norm": 0.3623081147670746, |
|
"learning_rate": 0.00013085427135678393, |
|
"loss": 1.0032, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.15569395017793594, |
|
"grad_norm": 0.3889707922935486, |
|
"learning_rate": 0.0001306532663316583, |
|
"loss": 0.9245, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1561387900355872, |
|
"grad_norm": 0.3887019455432892, |
|
"learning_rate": 0.00013045226130653268, |
|
"loss": 1.0836, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.15658362989323843, |
|
"grad_norm": 0.32099735736846924, |
|
"learning_rate": 0.00013025125628140705, |
|
"loss": 0.907, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.1570284697508897, |
|
"grad_norm": 0.29804012179374695, |
|
"learning_rate": 0.0001300502512562814, |
|
"loss": 0.9691, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.15747330960854092, |
|
"grad_norm": 0.3739759922027588, |
|
"learning_rate": 0.00012984924623115577, |
|
"loss": 1.0826, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.15791814946619218, |
|
"grad_norm": 0.3806516230106354, |
|
"learning_rate": 0.00012964824120603017, |
|
"loss": 1.0638, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.1583629893238434, |
|
"grad_norm": 0.3504475951194763, |
|
"learning_rate": 0.00012944723618090454, |
|
"loss": 0.8802, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.15880782918149466, |
|
"grad_norm": 0.4315776228904724, |
|
"learning_rate": 0.0001292462311557789, |
|
"loss": 1.0159, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.1592526690391459, |
|
"grad_norm": 0.3647462725639343, |
|
"learning_rate": 0.00012904522613065326, |
|
"loss": 0.985, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.15969750889679715, |
|
"grad_norm": 0.30111998319625854, |
|
"learning_rate": 0.00012884422110552766, |
|
"loss": 0.951, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.1601423487544484, |
|
"grad_norm": 0.3357127904891968, |
|
"learning_rate": 0.000128643216080402, |
|
"loss": 1.1002, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.16058718861209964, |
|
"grad_norm": 0.4213799834251404, |
|
"learning_rate": 0.00012844221105527638, |
|
"loss": 0.8264, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.1610320284697509, |
|
"grad_norm": 0.4168066084384918, |
|
"learning_rate": 0.00012824120603015075, |
|
"loss": 1.0548, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.16147686832740213, |
|
"grad_norm": 0.36040782928466797, |
|
"learning_rate": 0.00012804020100502515, |
|
"loss": 1.0988, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.1619217081850534, |
|
"grad_norm": 0.315931499004364, |
|
"learning_rate": 0.0001278391959798995, |
|
"loss": 0.8874, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.16236654804270462, |
|
"grad_norm": 0.3526541292667389, |
|
"learning_rate": 0.00012763819095477387, |
|
"loss": 0.8408, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.16281138790035588, |
|
"grad_norm": 0.315824031829834, |
|
"learning_rate": 0.00012743718592964824, |
|
"loss": 0.9537, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.1632562277580071, |
|
"grad_norm": 0.36629655957221985, |
|
"learning_rate": 0.00012723618090452262, |
|
"loss": 0.9732, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.16370106761565836, |
|
"grad_norm": 0.3385011553764343, |
|
"learning_rate": 0.000127035175879397, |
|
"loss": 1.0207, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.16414590747330962, |
|
"grad_norm": 0.43948590755462646, |
|
"learning_rate": 0.00012683417085427136, |
|
"loss": 0.8719, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.16459074733096085, |
|
"grad_norm": 0.4001463055610657, |
|
"learning_rate": 0.00012663316582914574, |
|
"loss": 1.085, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1650355871886121, |
|
"grad_norm": 0.37441739439964294, |
|
"learning_rate": 0.0001264321608040201, |
|
"loss": 0.9799, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.16548042704626334, |
|
"grad_norm": 0.29792410135269165, |
|
"learning_rate": 0.00012623115577889448, |
|
"loss": 0.8901, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.1659252669039146, |
|
"grad_norm": 0.42688125371932983, |
|
"learning_rate": 0.00012603015075376885, |
|
"loss": 1.085, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.16637010676156583, |
|
"grad_norm": 0.3854696750640869, |
|
"learning_rate": 0.00012582914572864323, |
|
"loss": 0.9271, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.16681494661921709, |
|
"grad_norm": 0.40170320868492126, |
|
"learning_rate": 0.0001256281407035176, |
|
"loss": 0.9355, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.16725978647686832, |
|
"grad_norm": 0.3294379711151123, |
|
"learning_rate": 0.00012542713567839197, |
|
"loss": 0.9221, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.16770462633451957, |
|
"grad_norm": 0.3526048958301544, |
|
"learning_rate": 0.00012522613065326635, |
|
"loss": 1.0483, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.16814946619217083, |
|
"grad_norm": 0.3107386529445648, |
|
"learning_rate": 0.0001250251256281407, |
|
"loss": 0.9166, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.16859430604982206, |
|
"grad_norm": 0.4228864908218384, |
|
"learning_rate": 0.00012482412060301507, |
|
"loss": 1.0207, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.16903914590747332, |
|
"grad_norm": 0.3822677731513977, |
|
"learning_rate": 0.00012462311557788947, |
|
"loss": 0.9935, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16948398576512455, |
|
"grad_norm": 0.3852001428604126, |
|
"learning_rate": 0.00012442211055276384, |
|
"loss": 0.9831, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.1699288256227758, |
|
"grad_norm": 0.3484053313732147, |
|
"learning_rate": 0.00012422110552763818, |
|
"loss": 0.9327, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.17037366548042704, |
|
"grad_norm": 0.3499116897583008, |
|
"learning_rate": 0.00012402010050251256, |
|
"loss": 1.0521, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.1708185053380783, |
|
"grad_norm": 0.362118661403656, |
|
"learning_rate": 0.00012381909547738696, |
|
"loss": 1.0482, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.17126334519572953, |
|
"grad_norm": 0.3913699686527252, |
|
"learning_rate": 0.0001236180904522613, |
|
"loss": 0.9908, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.17170818505338079, |
|
"grad_norm": 0.3362598121166229, |
|
"learning_rate": 0.00012341708542713568, |
|
"loss": 0.9814, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.17215302491103202, |
|
"grad_norm": 0.34024205803871155, |
|
"learning_rate": 0.00012321608040201005, |
|
"loss": 0.9747, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.17259786476868327, |
|
"grad_norm": 0.35168662667274475, |
|
"learning_rate": 0.00012301507537688445, |
|
"loss": 1.0067, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.17304270462633453, |
|
"grad_norm": 0.30861225724220276, |
|
"learning_rate": 0.0001228140703517588, |
|
"loss": 1.06, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.17348754448398576, |
|
"grad_norm": 0.3759268522262573, |
|
"learning_rate": 0.00012261306532663317, |
|
"loss": 1.1586, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.17393238434163702, |
|
"grad_norm": 0.3894938826560974, |
|
"learning_rate": 0.00012241206030150754, |
|
"loss": 0.9451, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.17437722419928825, |
|
"grad_norm": 0.3796166777610779, |
|
"learning_rate": 0.00012221105527638191, |
|
"loss": 0.9602, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.1748220640569395, |
|
"grad_norm": 0.32060232758522034, |
|
"learning_rate": 0.00012201005025125629, |
|
"loss": 1.0365, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.17526690391459074, |
|
"grad_norm": 0.33367520570755005, |
|
"learning_rate": 0.00012180904522613066, |
|
"loss": 0.9616, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.175711743772242, |
|
"grad_norm": 0.3084149658679962, |
|
"learning_rate": 0.00012160804020100502, |
|
"loss": 0.8514, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.17615658362989323, |
|
"grad_norm": 0.32909664511680603, |
|
"learning_rate": 0.00012140703517587942, |
|
"loss": 0.9773, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.17660142348754448, |
|
"grad_norm": 0.3140466511249542, |
|
"learning_rate": 0.00012120603015075378, |
|
"loss": 0.9808, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.17704626334519574, |
|
"grad_norm": 0.34496647119522095, |
|
"learning_rate": 0.00012100502512562815, |
|
"loss": 0.8233, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.17749110320284697, |
|
"grad_norm": 0.4260810315608978, |
|
"learning_rate": 0.00012080402010050251, |
|
"loss": 0.9703, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.17793594306049823, |
|
"grad_norm": 0.3465157747268677, |
|
"learning_rate": 0.00012060301507537688, |
|
"loss": 1.0121, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17838078291814946, |
|
"grad_norm": 0.3023923635482788, |
|
"learning_rate": 0.00012040201005025127, |
|
"loss": 0.9899, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.17882562277580072, |
|
"grad_norm": 0.3486076593399048, |
|
"learning_rate": 0.00012020100502512563, |
|
"loss": 0.9325, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.17927046263345195, |
|
"grad_norm": 0.3666176199913025, |
|
"learning_rate": 0.00012, |
|
"loss": 1.019, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.1797153024911032, |
|
"grad_norm": 0.3321322798728943, |
|
"learning_rate": 0.00011979899497487436, |
|
"loss": 0.9197, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.18016014234875444, |
|
"grad_norm": 0.4836527407169342, |
|
"learning_rate": 0.00011959798994974876, |
|
"loss": 0.8488, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.1806049822064057, |
|
"grad_norm": 0.3428821861743927, |
|
"learning_rate": 0.00011939698492462312, |
|
"loss": 1.0314, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.18104982206405695, |
|
"grad_norm": 0.38532841205596924, |
|
"learning_rate": 0.0001191959798994975, |
|
"loss": 0.9393, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.18149466192170818, |
|
"grad_norm": 0.3802700340747833, |
|
"learning_rate": 0.00011899497487437185, |
|
"loss": 0.9136, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.18193950177935944, |
|
"grad_norm": 0.3403851389884949, |
|
"learning_rate": 0.00011879396984924624, |
|
"loss": 0.9552, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.18238434163701067, |
|
"grad_norm": 0.3999098241329193, |
|
"learning_rate": 0.00011859296482412061, |
|
"loss": 0.9292, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.18282918149466193, |
|
"grad_norm": 0.3740805685520172, |
|
"learning_rate": 0.00011839195979899497, |
|
"loss": 1.0809, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.18327402135231316, |
|
"grad_norm": 0.42133694887161255, |
|
"learning_rate": 0.00011819095477386935, |
|
"loss": 0.8661, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.18371886120996442, |
|
"grad_norm": 0.39346155524253845, |
|
"learning_rate": 0.00011798994974874373, |
|
"loss": 1.0682, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.18416370106761565, |
|
"grad_norm": 0.3395063281059265, |
|
"learning_rate": 0.0001177889447236181, |
|
"loss": 0.9672, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.1846085409252669, |
|
"grad_norm": 0.3930981755256653, |
|
"learning_rate": 0.00011758793969849247, |
|
"loss": 0.9354, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.18505338078291814, |
|
"grad_norm": 0.40588700771331787, |
|
"learning_rate": 0.00011738693467336684, |
|
"loss": 0.9723, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.1854982206405694, |
|
"grad_norm": 0.4842967689037323, |
|
"learning_rate": 0.00011718592964824122, |
|
"loss": 0.9098, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.18594306049822065, |
|
"grad_norm": 0.34736424684524536, |
|
"learning_rate": 0.00011698492462311558, |
|
"loss": 0.9383, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.18638790035587188, |
|
"grad_norm": 0.41721245646476746, |
|
"learning_rate": 0.00011678391959798996, |
|
"loss": 0.9804, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.18683274021352314, |
|
"grad_norm": 0.320420503616333, |
|
"learning_rate": 0.00011658291457286432, |
|
"loss": 0.9266, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18727758007117437, |
|
"grad_norm": 0.3732924461364746, |
|
"learning_rate": 0.00011638190954773872, |
|
"loss": 0.9703, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.18772241992882563, |
|
"grad_norm": 0.3584558069705963, |
|
"learning_rate": 0.00011618090452261308, |
|
"loss": 0.8832, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.18816725978647686, |
|
"grad_norm": 0.3892074525356293, |
|
"learning_rate": 0.00011597989949748745, |
|
"loss": 0.9617, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.18861209964412812, |
|
"grad_norm": 0.3820159435272217, |
|
"learning_rate": 0.00011577889447236181, |
|
"loss": 0.9092, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.18905693950177935, |
|
"grad_norm": 0.33480194211006165, |
|
"learning_rate": 0.00011557788944723618, |
|
"loss": 1.0104, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.1895017793594306, |
|
"grad_norm": 0.41536465287208557, |
|
"learning_rate": 0.00011537688442211057, |
|
"loss": 0.9157, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.18994661921708186, |
|
"grad_norm": 0.3305935263633728, |
|
"learning_rate": 0.00011517587939698493, |
|
"loss": 0.9622, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.1903914590747331, |
|
"grad_norm": 0.4637777805328369, |
|
"learning_rate": 0.0001149748743718593, |
|
"loss": 0.9317, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.19083629893238435, |
|
"grad_norm": 0.38072511553764343, |
|
"learning_rate": 0.00011477386934673366, |
|
"loss": 1.0061, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.19128113879003558, |
|
"grad_norm": 0.35834434628486633, |
|
"learning_rate": 0.00011457286432160806, |
|
"loss": 1.0108, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.19172597864768684, |
|
"grad_norm": 0.4265679717063904, |
|
"learning_rate": 0.00011437185929648242, |
|
"loss": 0.9451, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.19217081850533807, |
|
"grad_norm": 0.38988345861434937, |
|
"learning_rate": 0.00011417085427135679, |
|
"loss": 1.1026, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.19261565836298933, |
|
"grad_norm": 0.35931700468063354, |
|
"learning_rate": 0.00011396984924623115, |
|
"loss": 0.9296, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.19306049822064056, |
|
"grad_norm": 0.3506704568862915, |
|
"learning_rate": 0.00011376884422110554, |
|
"loss": 0.942, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.19350533807829182, |
|
"grad_norm": 0.38077566027641296, |
|
"learning_rate": 0.00011356783919597991, |
|
"loss": 1.0012, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.19395017793594305, |
|
"grad_norm": 0.3146056532859802, |
|
"learning_rate": 0.00011336683417085427, |
|
"loss": 0.9361, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.1943950177935943, |
|
"grad_norm": 0.42160287499427795, |
|
"learning_rate": 0.00011316582914572864, |
|
"loss": 0.9426, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.19483985765124556, |
|
"grad_norm": 0.33514153957366943, |
|
"learning_rate": 0.00011296482412060303, |
|
"loss": 1.0237, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.1952846975088968, |
|
"grad_norm": 0.46194738149642944, |
|
"learning_rate": 0.0001127638190954774, |
|
"loss": 1.095, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.19572953736654805, |
|
"grad_norm": 0.40086501836776733, |
|
"learning_rate": 0.00011256281407035176, |
|
"loss": 0.9239, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19617437722419928, |
|
"grad_norm": 0.34976860880851746, |
|
"learning_rate": 0.00011236180904522614, |
|
"loss": 0.8445, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.19661921708185054, |
|
"grad_norm": 0.35818928480148315, |
|
"learning_rate": 0.00011216080402010052, |
|
"loss": 0.9626, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.19706405693950177, |
|
"grad_norm": 0.35546717047691345, |
|
"learning_rate": 0.00011195979899497488, |
|
"loss": 0.981, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.19750889679715303, |
|
"grad_norm": 0.32887890934944153, |
|
"learning_rate": 0.00011175879396984925, |
|
"loss": 0.9953, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.19795373665480426, |
|
"grad_norm": 0.3613591492176056, |
|
"learning_rate": 0.00011155778894472361, |
|
"loss": 0.9798, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.19839857651245552, |
|
"grad_norm": 0.3338494896888733, |
|
"learning_rate": 0.00011135678391959799, |
|
"loss": 1.0195, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.19884341637010677, |
|
"grad_norm": 0.3248537480831146, |
|
"learning_rate": 0.00011115577889447237, |
|
"loss": 0.9145, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.199288256227758, |
|
"grad_norm": 0.35757869482040405, |
|
"learning_rate": 0.00011095477386934675, |
|
"loss": 0.898, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.19973309608540926, |
|
"grad_norm": 0.35583075881004333, |
|
"learning_rate": 0.0001107537688442211, |
|
"loss": 1.0303, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.2001779359430605, |
|
"grad_norm": 0.5167235136032104, |
|
"learning_rate": 0.00011055276381909548, |
|
"loss": 0.8168, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.20062277580071175, |
|
"grad_norm": 0.3596641719341278, |
|
"learning_rate": 0.00011035175879396986, |
|
"loss": 0.9184, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.20106761565836298, |
|
"grad_norm": 0.4046080410480499, |
|
"learning_rate": 0.00011015075376884422, |
|
"loss": 1.0039, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.20151245551601424, |
|
"grad_norm": 0.36985480785369873, |
|
"learning_rate": 0.0001099497487437186, |
|
"loss": 0.9409, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.20195729537366547, |
|
"grad_norm": 0.36848029494285583, |
|
"learning_rate": 0.00010974874371859296, |
|
"loss": 1.0075, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.20240213523131673, |
|
"grad_norm": 0.3421315550804138, |
|
"learning_rate": 0.00010954773869346736, |
|
"loss": 1.0761, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.20284697508896798, |
|
"grad_norm": 0.35291051864624023, |
|
"learning_rate": 0.00010934673366834172, |
|
"loss": 0.9096, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.20329181494661921, |
|
"grad_norm": 0.3597058355808258, |
|
"learning_rate": 0.00010914572864321609, |
|
"loss": 0.9133, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.20373665480427047, |
|
"grad_norm": 0.3638782501220703, |
|
"learning_rate": 0.00010894472361809045, |
|
"loss": 0.9936, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.2041814946619217, |
|
"grad_norm": 0.33646896481513977, |
|
"learning_rate": 0.00010874371859296483, |
|
"loss": 0.9592, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.20462633451957296, |
|
"grad_norm": 0.3791368901729584, |
|
"learning_rate": 0.00010854271356783921, |
|
"loss": 0.9708, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2050711743772242, |
|
"grad_norm": 0.3495243489742279, |
|
"learning_rate": 0.00010834170854271357, |
|
"loss": 0.9908, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.20551601423487545, |
|
"grad_norm": 0.34329208731651306, |
|
"learning_rate": 0.00010814070351758794, |
|
"loss": 0.9722, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.20596085409252668, |
|
"grad_norm": 0.40529054403305054, |
|
"learning_rate": 0.00010793969849246233, |
|
"loss": 0.9758, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.20640569395017794, |
|
"grad_norm": 0.44128406047821045, |
|
"learning_rate": 0.0001077386934673367, |
|
"loss": 0.9661, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.20685053380782917, |
|
"grad_norm": 0.3696465790271759, |
|
"learning_rate": 0.00010753768844221106, |
|
"loss": 1.0031, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.20729537366548043, |
|
"grad_norm": 0.36873361468315125, |
|
"learning_rate": 0.00010733668341708543, |
|
"loss": 0.9141, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.20774021352313168, |
|
"grad_norm": 0.40861397981643677, |
|
"learning_rate": 0.00010713567839195982, |
|
"loss": 0.9727, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.20818505338078291, |
|
"grad_norm": 0.3468811511993408, |
|
"learning_rate": 0.00010693467336683418, |
|
"loss": 1.0182, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.20862989323843417, |
|
"grad_norm": 0.37425243854522705, |
|
"learning_rate": 0.00010673366834170855, |
|
"loss": 0.9268, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.2090747330960854, |
|
"grad_norm": 0.4296034574508667, |
|
"learning_rate": 0.00010653266331658291, |
|
"loss": 1.116, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.20951957295373666, |
|
"grad_norm": 0.3852652609348297, |
|
"learning_rate": 0.00010633165829145728, |
|
"loss": 0.9378, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.2099644128113879, |
|
"grad_norm": 0.34109607338905334, |
|
"learning_rate": 0.00010613065326633167, |
|
"loss": 1.004, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.21040925266903915, |
|
"grad_norm": 0.4025616943836212, |
|
"learning_rate": 0.00010592964824120604, |
|
"loss": 0.96, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.21085409252669038, |
|
"grad_norm": 0.3672776222229004, |
|
"learning_rate": 0.0001057286432160804, |
|
"loss": 0.8744, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.21129893238434164, |
|
"grad_norm": 0.36405524611473083, |
|
"learning_rate": 0.00010552763819095478, |
|
"loss": 0.8957, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2117437722419929, |
|
"grad_norm": 0.41747644543647766, |
|
"learning_rate": 0.00010532663316582916, |
|
"loss": 1.0165, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.21218861209964412, |
|
"grad_norm": 0.3478085994720459, |
|
"learning_rate": 0.00010512562814070352, |
|
"loss": 0.9776, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.21263345195729538, |
|
"grad_norm": 0.365633487701416, |
|
"learning_rate": 0.0001049246231155779, |
|
"loss": 0.9826, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.2130782918149466, |
|
"grad_norm": 0.3211476802825928, |
|
"learning_rate": 0.00010472361809045225, |
|
"loss": 0.9228, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.21352313167259787, |
|
"grad_norm": 0.3374654948711395, |
|
"learning_rate": 0.00010452261306532664, |
|
"loss": 0.9379, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2139679715302491, |
|
"grad_norm": 0.34640708565711975, |
|
"learning_rate": 0.00010432160804020101, |
|
"loss": 0.8678, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.21441281138790036, |
|
"grad_norm": 0.38176846504211426, |
|
"learning_rate": 0.00010412060301507539, |
|
"loss": 0.9597, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.2148576512455516, |
|
"grad_norm": 0.34589311480522156, |
|
"learning_rate": 0.00010391959798994975, |
|
"loss": 0.9423, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.21530249110320285, |
|
"grad_norm": 0.4062221348285675, |
|
"learning_rate": 0.00010371859296482413, |
|
"loss": 0.9926, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.2157473309608541, |
|
"grad_norm": 0.3403069078922272, |
|
"learning_rate": 0.0001035175879396985, |
|
"loss": 0.9953, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.21619217081850534, |
|
"grad_norm": 0.3341009318828583, |
|
"learning_rate": 0.00010331658291457286, |
|
"loss": 0.9807, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.2166370106761566, |
|
"grad_norm": 0.29718342423439026, |
|
"learning_rate": 0.00010311557788944724, |
|
"loss": 0.9296, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.21708185053380782, |
|
"grad_norm": 0.4219815135002136, |
|
"learning_rate": 0.00010291457286432162, |
|
"loss": 1.0007, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.21752669039145908, |
|
"grad_norm": 0.30951768159866333, |
|
"learning_rate": 0.00010271356783919598, |
|
"loss": 0.9686, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.2179715302491103, |
|
"grad_norm": 0.38819047808647156, |
|
"learning_rate": 0.00010251256281407036, |
|
"loss": 1.1859, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.21841637010676157, |
|
"grad_norm": 0.3185841739177704, |
|
"learning_rate": 0.00010231155778894473, |
|
"loss": 0.9523, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.2188612099644128, |
|
"grad_norm": 0.3971594274044037, |
|
"learning_rate": 0.00010211055276381909, |
|
"loss": 1.0133, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.21930604982206406, |
|
"grad_norm": 0.31492552161216736, |
|
"learning_rate": 0.00010190954773869348, |
|
"loss": 0.8269, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.2197508896797153, |
|
"grad_norm": 0.3949122130870819, |
|
"learning_rate": 0.00010170854271356785, |
|
"loss": 1.0379, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.22019572953736655, |
|
"grad_norm": 0.30094975233078003, |
|
"learning_rate": 0.00010150753768844221, |
|
"loss": 0.9071, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.2206405693950178, |
|
"grad_norm": 0.3560626208782196, |
|
"learning_rate": 0.00010130653266331658, |
|
"loss": 0.9853, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.22108540925266904, |
|
"grad_norm": 0.4022250771522522, |
|
"learning_rate": 0.00010110552763819097, |
|
"loss": 0.9823, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.2215302491103203, |
|
"grad_norm": 0.3849669098854065, |
|
"learning_rate": 0.00010090452261306533, |
|
"loss": 0.9176, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.22197508896797152, |
|
"grad_norm": 0.47436994314193726, |
|
"learning_rate": 0.0001007035175879397, |
|
"loss": 0.9948, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.22241992882562278, |
|
"grad_norm": 0.32288622856140137, |
|
"learning_rate": 0.00010050251256281407, |
|
"loss": 0.9409, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.222864768683274, |
|
"grad_norm": 0.33923619985580444, |
|
"learning_rate": 0.00010030150753768846, |
|
"loss": 0.9096, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.22330960854092527, |
|
"grad_norm": 0.3114798665046692, |
|
"learning_rate": 0.00010010050251256282, |
|
"loss": 0.9112, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.2237544483985765, |
|
"grad_norm": 0.37581518292427063, |
|
"learning_rate": 9.989949748743719e-05, |
|
"loss": 0.9831, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.22419928825622776, |
|
"grad_norm": 0.33060386776924133, |
|
"learning_rate": 9.969849246231156e-05, |
|
"loss": 0.8942, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.22464412811387902, |
|
"grad_norm": 0.32445380091667175, |
|
"learning_rate": 9.949748743718594e-05, |
|
"loss": 0.9665, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.22508896797153025, |
|
"grad_norm": 0.3652149438858032, |
|
"learning_rate": 9.929648241206031e-05, |
|
"loss": 1.0325, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.2255338078291815, |
|
"grad_norm": 0.3789691925048828, |
|
"learning_rate": 9.909547738693468e-05, |
|
"loss": 0.9488, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.22597864768683273, |
|
"grad_norm": 0.35577788949012756, |
|
"learning_rate": 9.889447236180906e-05, |
|
"loss": 0.9324, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.226423487544484, |
|
"grad_norm": 0.37785062193870544, |
|
"learning_rate": 9.869346733668342e-05, |
|
"loss": 0.9934, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.22686832740213522, |
|
"grad_norm": 0.3577893078327179, |
|
"learning_rate": 9.84924623115578e-05, |
|
"loss": 1.0183, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.22731316725978648, |
|
"grad_norm": 0.3955710232257843, |
|
"learning_rate": 9.829145728643216e-05, |
|
"loss": 0.9138, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.2277580071174377, |
|
"grad_norm": 0.333099365234375, |
|
"learning_rate": 9.809045226130655e-05, |
|
"loss": 0.9533, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.22820284697508897, |
|
"grad_norm": 0.358026921749115, |
|
"learning_rate": 9.788944723618091e-05, |
|
"loss": 0.9919, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.22864768683274023, |
|
"grad_norm": 0.3610975742340088, |
|
"learning_rate": 9.768844221105528e-05, |
|
"loss": 1.0059, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.22909252669039146, |
|
"grad_norm": 0.32842594385147095, |
|
"learning_rate": 9.748743718592965e-05, |
|
"loss": 0.9877, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.22953736654804271, |
|
"grad_norm": 0.4672807455062866, |
|
"learning_rate": 9.728643216080403e-05, |
|
"loss": 1.0148, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.22998220640569395, |
|
"grad_norm": 0.3504094183444977, |
|
"learning_rate": 9.70854271356784e-05, |
|
"loss": 0.8774, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.2304270462633452, |
|
"grad_norm": 0.3299584984779358, |
|
"learning_rate": 9.688442211055276e-05, |
|
"loss": 0.9018, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.23087188612099643, |
|
"grad_norm": 0.3598071336746216, |
|
"learning_rate": 9.668341708542715e-05, |
|
"loss": 0.9, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.2313167259786477, |
|
"grad_norm": 0.366554856300354, |
|
"learning_rate": 9.64824120603015e-05, |
|
"loss": 0.8612, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.23176156583629892, |
|
"grad_norm": 0.3408771753311157, |
|
"learning_rate": 9.628140703517589e-05, |
|
"loss": 0.9482, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.23220640569395018, |
|
"grad_norm": 0.4127398133277893, |
|
"learning_rate": 9.608040201005025e-05, |
|
"loss": 0.9427, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.2326512455516014, |
|
"grad_norm": 0.4046645164489746, |
|
"learning_rate": 9.587939698492462e-05, |
|
"loss": 1.0469, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.23309608540925267, |
|
"grad_norm": 0.33988115191459656, |
|
"learning_rate": 9.5678391959799e-05, |
|
"loss": 0.888, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.23354092526690393, |
|
"grad_norm": 0.35615596175193787, |
|
"learning_rate": 9.547738693467337e-05, |
|
"loss": 0.9522, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.23398576512455516, |
|
"grad_norm": 0.41455528140068054, |
|
"learning_rate": 9.527638190954774e-05, |
|
"loss": 0.9495, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.23443060498220641, |
|
"grad_norm": 0.4435769021511078, |
|
"learning_rate": 9.507537688442212e-05, |
|
"loss": 0.9251, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.23487544483985764, |
|
"grad_norm": 0.34414413571357727, |
|
"learning_rate": 9.487437185929649e-05, |
|
"loss": 0.9746, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.2353202846975089, |
|
"grad_norm": 0.42099055647850037, |
|
"learning_rate": 9.467336683417086e-05, |
|
"loss": 1.0079, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.23576512455516013, |
|
"grad_norm": 0.3634389042854309, |
|
"learning_rate": 9.447236180904523e-05, |
|
"loss": 0.8452, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2362099644128114, |
|
"grad_norm": 0.45584288239479065, |
|
"learning_rate": 9.427135678391961e-05, |
|
"loss": 0.9618, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.23665480427046262, |
|
"grad_norm": 0.3558484613895416, |
|
"learning_rate": 9.407035175879397e-05, |
|
"loss": 1.007, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.23709964412811388, |
|
"grad_norm": 0.41819700598716736, |
|
"learning_rate": 9.386934673366835e-05, |
|
"loss": 0.8806, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.23754448398576514, |
|
"grad_norm": 0.39965230226516724, |
|
"learning_rate": 9.366834170854271e-05, |
|
"loss": 1.0749, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.23798932384341637, |
|
"grad_norm": 0.35675838589668274, |
|
"learning_rate": 9.34673366834171e-05, |
|
"loss": 1.0074, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.23843416370106763, |
|
"grad_norm": 0.37170112133026123, |
|
"learning_rate": 9.326633165829146e-05, |
|
"loss": 0.9623, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.23887900355871886, |
|
"grad_norm": 0.35851290822029114, |
|
"learning_rate": 9.306532663316585e-05, |
|
"loss": 0.9361, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.2393238434163701, |
|
"grad_norm": 0.4395543038845062, |
|
"learning_rate": 9.28643216080402e-05, |
|
"loss": 0.871, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.23976868327402134, |
|
"grad_norm": 0.2868823707103729, |
|
"learning_rate": 9.266331658291458e-05, |
|
"loss": 0.9066, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.2402135231316726, |
|
"grad_norm": 0.3684181869029999, |
|
"learning_rate": 9.246231155778895e-05, |
|
"loss": 1.0657, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.24065836298932383, |
|
"grad_norm": 0.347599059343338, |
|
"learning_rate": 9.226130653266331e-05, |
|
"loss": 1.0081, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.2411032028469751, |
|
"grad_norm": 0.32958024740219116, |
|
"learning_rate": 9.20603015075377e-05, |
|
"loss": 0.9427, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.24154804270462635, |
|
"grad_norm": 0.3672914505004883, |
|
"learning_rate": 9.185929648241206e-05, |
|
"loss": 0.994, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.24199288256227758, |
|
"grad_norm": 0.3725748658180237, |
|
"learning_rate": 9.165829145728644e-05, |
|
"loss": 0.9835, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.24243772241992884, |
|
"grad_norm": 0.39337027072906494, |
|
"learning_rate": 9.14572864321608e-05, |
|
"loss": 0.8137, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.24288256227758007, |
|
"grad_norm": 0.32661673426628113, |
|
"learning_rate": 9.125628140703519e-05, |
|
"loss": 1.0025, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.24332740213523132, |
|
"grad_norm": 0.3912467956542969, |
|
"learning_rate": 9.105527638190955e-05, |
|
"loss": 0.9087, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.24377224199288255, |
|
"grad_norm": 0.37240296602249146, |
|
"learning_rate": 9.085427135678392e-05, |
|
"loss": 0.9275, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.2442170818505338, |
|
"grad_norm": 0.3758089244365692, |
|
"learning_rate": 9.06532663316583e-05, |
|
"loss": 0.8224, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.24466192170818504, |
|
"grad_norm": 0.3426155745983124, |
|
"learning_rate": 9.045226130653267e-05, |
|
"loss": 0.8894, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2451067615658363, |
|
"grad_norm": 0.3958059549331665, |
|
"learning_rate": 9.025125628140704e-05, |
|
"loss": 0.9429, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.24555160142348753, |
|
"grad_norm": 0.4533610939979553, |
|
"learning_rate": 9.005025125628141e-05, |
|
"loss": 1.2454, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.2459964412811388, |
|
"grad_norm": 0.36734533309936523, |
|
"learning_rate": 8.984924623115579e-05, |
|
"loss": 1.0402, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.24644128113879005, |
|
"grad_norm": 0.42336615920066833, |
|
"learning_rate": 8.964824120603016e-05, |
|
"loss": 1.1425, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.24688612099644128, |
|
"grad_norm": 0.44834333658218384, |
|
"learning_rate": 8.944723618090453e-05, |
|
"loss": 0.8959, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.24733096085409254, |
|
"grad_norm": 0.3234824240207672, |
|
"learning_rate": 8.92462311557789e-05, |
|
"loss": 0.9743, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.24777580071174377, |
|
"grad_norm": 0.3193085491657257, |
|
"learning_rate": 8.904522613065326e-05, |
|
"loss": 0.9685, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.24822064056939502, |
|
"grad_norm": 0.34897491335868835, |
|
"learning_rate": 8.884422110552765e-05, |
|
"loss": 0.9132, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.24866548042704625, |
|
"grad_norm": 0.3368080258369446, |
|
"learning_rate": 8.864321608040201e-05, |
|
"loss": 0.9678, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.2491103202846975, |
|
"grad_norm": 0.3948473036289215, |
|
"learning_rate": 8.84422110552764e-05, |
|
"loss": 0.993, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.24955516014234874, |
|
"grad_norm": 0.40010324120521545, |
|
"learning_rate": 8.824120603015076e-05, |
|
"loss": 0.9779, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.40043747425079346, |
|
"learning_rate": 8.804020100502513e-05, |
|
"loss": 0.9633, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.25044483985765126, |
|
"grad_norm": 0.3239743709564209, |
|
"learning_rate": 8.78391959798995e-05, |
|
"loss": 0.939, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.2508896797153025, |
|
"grad_norm": 0.3126469552516937, |
|
"learning_rate": 8.763819095477387e-05, |
|
"loss": 0.8804, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.2513345195729537, |
|
"grad_norm": 0.32982298731803894, |
|
"learning_rate": 8.743718592964825e-05, |
|
"loss": 0.8698, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.251779359430605, |
|
"grad_norm": 0.4134942293167114, |
|
"learning_rate": 8.723618090452261e-05, |
|
"loss": 0.9995, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.25222419928825623, |
|
"grad_norm": 0.34916219115257263, |
|
"learning_rate": 8.7035175879397e-05, |
|
"loss": 0.9463, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.2526690391459075, |
|
"grad_norm": 0.3170456886291504, |
|
"learning_rate": 8.683417085427135e-05, |
|
"loss": 0.9028, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.2531138790035587, |
|
"grad_norm": 0.377886027097702, |
|
"learning_rate": 8.663316582914574e-05, |
|
"loss": 0.9503, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.25355871886120995, |
|
"grad_norm": 0.3546507656574249, |
|
"learning_rate": 8.64321608040201e-05, |
|
"loss": 0.9509, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2540035587188612, |
|
"grad_norm": 0.38051891326904297, |
|
"learning_rate": 8.623115577889449e-05, |
|
"loss": 0.8949, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.25444839857651247, |
|
"grad_norm": 0.2983281910419464, |
|
"learning_rate": 8.603015075376884e-05, |
|
"loss": 1.0411, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.2548932384341637, |
|
"grad_norm": 0.3371431529521942, |
|
"learning_rate": 8.582914572864322e-05, |
|
"loss": 0.93, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.25533807829181493, |
|
"grad_norm": 0.39484888315200806, |
|
"learning_rate": 8.562814070351759e-05, |
|
"loss": 0.9633, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.2557829181494662, |
|
"grad_norm": 0.34015023708343506, |
|
"learning_rate": 8.542713567839196e-05, |
|
"loss": 0.944, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.25622775800711745, |
|
"grad_norm": 0.32014408707618713, |
|
"learning_rate": 8.522613065326634e-05, |
|
"loss": 0.829, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.2566725978647687, |
|
"grad_norm": 0.42777976393699646, |
|
"learning_rate": 8.502512562814071e-05, |
|
"loss": 1.0107, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.2571174377224199, |
|
"grad_norm": 0.35483554005622864, |
|
"learning_rate": 8.482412060301508e-05, |
|
"loss": 0.9974, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.25756227758007116, |
|
"grad_norm": 0.41365087032318115, |
|
"learning_rate": 8.462311557788946e-05, |
|
"loss": 1.062, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.2580071174377224, |
|
"grad_norm": 0.37708839774131775, |
|
"learning_rate": 8.442211055276383e-05, |
|
"loss": 0.9283, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2584519572953737, |
|
"grad_norm": 0.40330421924591064, |
|
"learning_rate": 8.42211055276382e-05, |
|
"loss": 0.9482, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.25889679715302494, |
|
"grad_norm": 0.2943516969680786, |
|
"learning_rate": 8.402010050251256e-05, |
|
"loss": 0.9211, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.25934163701067614, |
|
"grad_norm": 0.3328346610069275, |
|
"learning_rate": 8.381909547738695e-05, |
|
"loss": 0.9264, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.2597864768683274, |
|
"grad_norm": 0.4220837354660034, |
|
"learning_rate": 8.36180904522613e-05, |
|
"loss": 0.9598, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.26023131672597866, |
|
"grad_norm": 0.36552560329437256, |
|
"learning_rate": 8.341708542713568e-05, |
|
"loss": 0.9275, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.2606761565836299, |
|
"grad_norm": 0.36409202218055725, |
|
"learning_rate": 8.321608040201005e-05, |
|
"loss": 0.9246, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.2611209964412811, |
|
"grad_norm": 0.3256109952926636, |
|
"learning_rate": 8.301507537688443e-05, |
|
"loss": 0.9725, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.2615658362989324, |
|
"grad_norm": 0.31421205401420593, |
|
"learning_rate": 8.28140703517588e-05, |
|
"loss": 0.9276, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.26201067615658363, |
|
"grad_norm": 0.33592212200164795, |
|
"learning_rate": 8.261306532663317e-05, |
|
"loss": 0.964, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.2624555160142349, |
|
"grad_norm": 0.49559640884399414, |
|
"learning_rate": 8.241206030150754e-05, |
|
"loss": 0.9688, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2629003558718861, |
|
"grad_norm": 0.37904122471809387, |
|
"learning_rate": 8.22110552763819e-05, |
|
"loss": 0.8688, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.26334519572953735, |
|
"grad_norm": 0.38418570160865784, |
|
"learning_rate": 8.201005025125629e-05, |
|
"loss": 0.966, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.2637900355871886, |
|
"grad_norm": 0.3689751923084259, |
|
"learning_rate": 8.180904522613065e-05, |
|
"loss": 0.8973, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.26423487544483987, |
|
"grad_norm": 0.3859994113445282, |
|
"learning_rate": 8.160804020100504e-05, |
|
"loss": 1.034, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.2646797153024911, |
|
"grad_norm": 0.4068204164505005, |
|
"learning_rate": 8.14070351758794e-05, |
|
"loss": 0.931, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.26512455516014233, |
|
"grad_norm": 0.3974169194698334, |
|
"learning_rate": 8.120603015075378e-05, |
|
"loss": 1.055, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.2655693950177936, |
|
"grad_norm": 0.3818078637123108, |
|
"learning_rate": 8.100502512562814e-05, |
|
"loss": 1.0652, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.26601423487544484, |
|
"grad_norm": 0.3733369708061218, |
|
"learning_rate": 8.080402010050251e-05, |
|
"loss": 0.9981, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.2664590747330961, |
|
"grad_norm": 0.3980778455734253, |
|
"learning_rate": 8.060301507537689e-05, |
|
"loss": 0.982, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.2669039145907473, |
|
"grad_norm": 0.3596290051937103, |
|
"learning_rate": 8.040201005025126e-05, |
|
"loss": 0.896, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.26734875444839856, |
|
"grad_norm": 0.330400675535202, |
|
"learning_rate": 8.020100502512563e-05, |
|
"loss": 0.9776, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.2677935943060498, |
|
"grad_norm": 0.3300771415233612, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0576, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.2682384341637011, |
|
"grad_norm": 0.36290502548217773, |
|
"learning_rate": 7.979899497487438e-05, |
|
"loss": 0.9378, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.26868327402135234, |
|
"grad_norm": 0.3487003743648529, |
|
"learning_rate": 7.959798994974875e-05, |
|
"loss": 0.9146, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.26912811387900354, |
|
"grad_norm": 0.3635631501674652, |
|
"learning_rate": 7.939698492462313e-05, |
|
"loss": 0.9758, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.2695729537366548, |
|
"grad_norm": 0.35547807812690735, |
|
"learning_rate": 7.91959798994975e-05, |
|
"loss": 1.0026, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.27001779359430605, |
|
"grad_norm": 0.33966726064682007, |
|
"learning_rate": 7.899497487437186e-05, |
|
"loss": 0.9059, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.2704626334519573, |
|
"grad_norm": 0.3759270906448364, |
|
"learning_rate": 7.879396984924623e-05, |
|
"loss": 1.0088, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.2709074733096085, |
|
"grad_norm": 0.36231812834739685, |
|
"learning_rate": 7.85929648241206e-05, |
|
"loss": 0.9903, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.2713523131672598, |
|
"grad_norm": 0.389737069606781, |
|
"learning_rate": 7.839195979899498e-05, |
|
"loss": 0.8706, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.27179715302491103, |
|
"grad_norm": 0.43408992886543274, |
|
"learning_rate": 7.819095477386935e-05, |
|
"loss": 0.9437, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.2722419928825623, |
|
"grad_norm": 0.29707393050193787, |
|
"learning_rate": 7.798994974874372e-05, |
|
"loss": 0.8361, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.27268683274021355, |
|
"grad_norm": 0.3469286561012268, |
|
"learning_rate": 7.77889447236181e-05, |
|
"loss": 1.0085, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.27313167259786475, |
|
"grad_norm": 0.4895261824131012, |
|
"learning_rate": 7.758793969849247e-05, |
|
"loss": 0.8068, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.273576512455516, |
|
"grad_norm": 0.3655518591403961, |
|
"learning_rate": 7.738693467336684e-05, |
|
"loss": 0.9248, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.27402135231316727, |
|
"grad_norm": 0.36705151200294495, |
|
"learning_rate": 7.71859296482412e-05, |
|
"loss": 1.0244, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.2744661921708185, |
|
"grad_norm": 0.3587312400341034, |
|
"learning_rate": 7.698492462311559e-05, |
|
"loss": 1.0522, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.2749110320284697, |
|
"grad_norm": 0.3758445680141449, |
|
"learning_rate": 7.678391959798995e-05, |
|
"loss": 0.9904, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.275355871886121, |
|
"grad_norm": 0.31862393021583557, |
|
"learning_rate": 7.658291457286433e-05, |
|
"loss": 0.9216, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.27580071174377224, |
|
"grad_norm": 0.4419485926628113, |
|
"learning_rate": 7.638190954773869e-05, |
|
"loss": 0.9049, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2762455516014235, |
|
"grad_norm": 0.38519737124443054, |
|
"learning_rate": 7.618090452261307e-05, |
|
"loss": 0.9116, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.27669039145907476, |
|
"grad_norm": 0.37228310108184814, |
|
"learning_rate": 7.597989949748744e-05, |
|
"loss": 0.9491, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.27713523131672596, |
|
"grad_norm": 0.3866081237792969, |
|
"learning_rate": 7.577889447236181e-05, |
|
"loss": 0.9477, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.2775800711743772, |
|
"grad_norm": 0.32213568687438965, |
|
"learning_rate": 7.557788944723618e-05, |
|
"loss": 0.9049, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.2780249110320285, |
|
"grad_norm": 0.34658104181289673, |
|
"learning_rate": 7.537688442211056e-05, |
|
"loss": 0.9486, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.27846975088967973, |
|
"grad_norm": 0.42399680614471436, |
|
"learning_rate": 7.517587939698493e-05, |
|
"loss": 1.0776, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.27891459074733094, |
|
"grad_norm": 0.3882311284542084, |
|
"learning_rate": 7.49748743718593e-05, |
|
"loss": 0.9711, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.2793594306049822, |
|
"grad_norm": 0.34879353642463684, |
|
"learning_rate": 7.477386934673368e-05, |
|
"loss": 0.9253, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.27980427046263345, |
|
"grad_norm": 0.3509921431541443, |
|
"learning_rate": 7.457286432160805e-05, |
|
"loss": 0.9711, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.2802491103202847, |
|
"grad_norm": 0.32417482137680054, |
|
"learning_rate": 7.437185929648241e-05, |
|
"loss": 0.9612, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.28069395017793597, |
|
"grad_norm": 0.3800636827945709, |
|
"learning_rate": 7.417085427135678e-05, |
|
"loss": 1.0243, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.28113879003558717, |
|
"grad_norm": 0.38342657685279846, |
|
"learning_rate": 7.396984924623115e-05, |
|
"loss": 0.9895, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.28158362989323843, |
|
"grad_norm": 0.40875983238220215, |
|
"learning_rate": 7.376884422110553e-05, |
|
"loss": 0.9428, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.2820284697508897, |
|
"grad_norm": 0.34101223945617676, |
|
"learning_rate": 7.35678391959799e-05, |
|
"loss": 0.977, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.28247330960854095, |
|
"grad_norm": 0.3537449836730957, |
|
"learning_rate": 7.336683417085427e-05, |
|
"loss": 0.9458, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.28291814946619215, |
|
"grad_norm": 0.3816027343273163, |
|
"learning_rate": 7.316582914572865e-05, |
|
"loss": 1.0429, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.2833629893238434, |
|
"grad_norm": 0.39149439334869385, |
|
"learning_rate": 7.296482412060302e-05, |
|
"loss": 0.974, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.28380782918149466, |
|
"grad_norm": 0.4129406809806824, |
|
"learning_rate": 7.276381909547739e-05, |
|
"loss": 0.988, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.2842526690391459, |
|
"grad_norm": 0.36804699897766113, |
|
"learning_rate": 7.256281407035177e-05, |
|
"loss": 0.9375, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.2846975088967972, |
|
"grad_norm": 0.40363621711730957, |
|
"learning_rate": 7.236180904522614e-05, |
|
"loss": 0.9214, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2851423487544484, |
|
"grad_norm": 0.3833264708518982, |
|
"learning_rate": 7.21608040201005e-05, |
|
"loss": 0.9454, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.28558718861209964, |
|
"grad_norm": 0.38440853357315063, |
|
"learning_rate": 7.195979899497488e-05, |
|
"loss": 0.8841, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.2860320284697509, |
|
"grad_norm": 0.42536038160324097, |
|
"learning_rate": 7.175879396984924e-05, |
|
"loss": 0.9665, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.28647686832740216, |
|
"grad_norm": 0.33145928382873535, |
|
"learning_rate": 7.155778894472363e-05, |
|
"loss": 0.9118, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.28692170818505336, |
|
"grad_norm": 0.37018072605133057, |
|
"learning_rate": 7.135678391959799e-05, |
|
"loss": 0.9424, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.2873665480427046, |
|
"grad_norm": 0.37953630089759827, |
|
"learning_rate": 7.115577889447236e-05, |
|
"loss": 0.9889, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.2878113879003559, |
|
"grad_norm": 0.3908143937587738, |
|
"learning_rate": 7.095477386934674e-05, |
|
"loss": 0.9815, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.28825622775800713, |
|
"grad_norm": 0.38611525297164917, |
|
"learning_rate": 7.075376884422111e-05, |
|
"loss": 1.0179, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.28870106761565834, |
|
"grad_norm": 0.4868585169315338, |
|
"learning_rate": 7.055276381909548e-05, |
|
"loss": 1.0551, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.2891459074733096, |
|
"grad_norm": 0.4477649927139282, |
|
"learning_rate": 7.035175879396985e-05, |
|
"loss": 0.8795, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.28959074733096085, |
|
"grad_norm": 0.3561191260814667, |
|
"learning_rate": 7.015075376884423e-05, |
|
"loss": 0.8927, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.2900355871886121, |
|
"grad_norm": 0.4050782024860382, |
|
"learning_rate": 6.99497487437186e-05, |
|
"loss": 0.9032, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.29048042704626337, |
|
"grad_norm": 0.3366767466068268, |
|
"learning_rate": 6.974874371859297e-05, |
|
"loss": 0.9763, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.29092526690391457, |
|
"grad_norm": 0.33202803134918213, |
|
"learning_rate": 6.954773869346733e-05, |
|
"loss": 0.8999, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.29137010676156583, |
|
"grad_norm": 0.38702261447906494, |
|
"learning_rate": 6.93467336683417e-05, |
|
"loss": 0.8882, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.2918149466192171, |
|
"grad_norm": 0.39489811658859253, |
|
"learning_rate": 6.914572864321608e-05, |
|
"loss": 0.8615, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.29225978647686834, |
|
"grad_norm": 0.407174289226532, |
|
"learning_rate": 6.894472361809045e-05, |
|
"loss": 0.8805, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.29270462633451955, |
|
"grad_norm": 0.3787648379802704, |
|
"learning_rate": 6.874371859296482e-05, |
|
"loss": 0.9016, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.2931494661921708, |
|
"grad_norm": 0.3571087121963501, |
|
"learning_rate": 6.85427135678392e-05, |
|
"loss": 0.9484, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.29359430604982206, |
|
"grad_norm": 0.41471996903419495, |
|
"learning_rate": 6.834170854271357e-05, |
|
"loss": 1.0558, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2940391459074733, |
|
"grad_norm": 0.4297381639480591, |
|
"learning_rate": 6.814070351758794e-05, |
|
"loss": 0.9687, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.2944839857651246, |
|
"grad_norm": 0.33687537908554077, |
|
"learning_rate": 6.793969849246232e-05, |
|
"loss": 0.9266, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.2949288256227758, |
|
"grad_norm": 0.34857362508773804, |
|
"learning_rate": 6.773869346733669e-05, |
|
"loss": 0.8977, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.29537366548042704, |
|
"grad_norm": 0.4323669672012329, |
|
"learning_rate": 6.753768844221105e-05, |
|
"loss": 0.9535, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.2958185053380783, |
|
"grad_norm": 0.36021220684051514, |
|
"learning_rate": 6.733668341708544e-05, |
|
"loss": 0.9457, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.29626334519572955, |
|
"grad_norm": 0.3574046492576599, |
|
"learning_rate": 6.71356783919598e-05, |
|
"loss": 0.9377, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.29670818505338076, |
|
"grad_norm": 0.3637276291847229, |
|
"learning_rate": 6.693467336683418e-05, |
|
"loss": 0.9769, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.297153024911032, |
|
"grad_norm": 0.4049037992954254, |
|
"learning_rate": 6.673366834170854e-05, |
|
"loss": 1.0499, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.2975978647686833, |
|
"grad_norm": 0.4278649687767029, |
|
"learning_rate": 6.653266331658293e-05, |
|
"loss": 0.8835, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.29804270462633453, |
|
"grad_norm": 0.37466055154800415, |
|
"learning_rate": 6.633165829145729e-05, |
|
"loss": 0.9493, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2984875444839858, |
|
"grad_norm": 0.3321222960948944, |
|
"learning_rate": 6.613065326633166e-05, |
|
"loss": 0.9484, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.298932384341637, |
|
"grad_norm": 0.4597262740135193, |
|
"learning_rate": 6.592964824120603e-05, |
|
"loss": 1.0418, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.29937722419928825, |
|
"grad_norm": 0.3227452337741852, |
|
"learning_rate": 6.57286432160804e-05, |
|
"loss": 0.9323, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.2998220640569395, |
|
"grad_norm": 0.3192083239555359, |
|
"learning_rate": 6.552763819095478e-05, |
|
"loss": 0.9021, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.30026690391459077, |
|
"grad_norm": 0.4145206809043884, |
|
"learning_rate": 6.532663316582915e-05, |
|
"loss": 0.9431, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.30071174377224197, |
|
"grad_norm": 0.4133388102054596, |
|
"learning_rate": 6.512562814070352e-05, |
|
"loss": 0.9673, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.3011565836298932, |
|
"grad_norm": 0.4261665940284729, |
|
"learning_rate": 6.492462311557788e-05, |
|
"loss": 1.0128, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.3016014234875445, |
|
"grad_norm": 0.33553653955459595, |
|
"learning_rate": 6.472361809045227e-05, |
|
"loss": 0.9103, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.30204626334519574, |
|
"grad_norm": 0.378233939409256, |
|
"learning_rate": 6.452261306532663e-05, |
|
"loss": 1.1017, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.302491103202847, |
|
"grad_norm": 0.39459675550460815, |
|
"learning_rate": 6.4321608040201e-05, |
|
"loss": 0.9056, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3029359430604982, |
|
"grad_norm": 0.31103044748306274, |
|
"learning_rate": 6.412060301507538e-05, |
|
"loss": 0.896, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.30338078291814946, |
|
"grad_norm": 0.32393592596054077, |
|
"learning_rate": 6.391959798994975e-05, |
|
"loss": 0.9233, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.3038256227758007, |
|
"grad_norm": 0.37299537658691406, |
|
"learning_rate": 6.371859296482412e-05, |
|
"loss": 0.9587, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.304270462633452, |
|
"grad_norm": 0.3141055703163147, |
|
"learning_rate": 6.35175879396985e-05, |
|
"loss": 0.9314, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.3047153024911032, |
|
"grad_norm": 0.32686251401901245, |
|
"learning_rate": 6.331658291457287e-05, |
|
"loss": 0.9325, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.30516014234875444, |
|
"grad_norm": 0.35383760929107666, |
|
"learning_rate": 6.311557788944724e-05, |
|
"loss": 0.9834, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.3056049822064057, |
|
"grad_norm": 0.30796653032302856, |
|
"learning_rate": 6.291457286432161e-05, |
|
"loss": 0.8842, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.30604982206405695, |
|
"grad_norm": 0.31963440775871277, |
|
"learning_rate": 6.271356783919599e-05, |
|
"loss": 0.9372, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.3064946619217082, |
|
"grad_norm": 0.43244582414627075, |
|
"learning_rate": 6.251256281407035e-05, |
|
"loss": 0.939, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.3069395017793594, |
|
"grad_norm": 0.32725387811660767, |
|
"learning_rate": 6.231155778894473e-05, |
|
"loss": 0.8819, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.30738434163701067, |
|
"grad_norm": 0.3326573371887207, |
|
"learning_rate": 6.211055276381909e-05, |
|
"loss": 1.0157, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.30782918149466193, |
|
"grad_norm": 0.3267054855823517, |
|
"learning_rate": 6.190954773869348e-05, |
|
"loss": 0.8806, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.3082740213523132, |
|
"grad_norm": 0.3328150808811188, |
|
"learning_rate": 6.170854271356784e-05, |
|
"loss": 0.9752, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.3087188612099644, |
|
"grad_norm": 0.42537713050842285, |
|
"learning_rate": 6.150753768844222e-05, |
|
"loss": 0.8548, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.30916370106761565, |
|
"grad_norm": 0.339568167924881, |
|
"learning_rate": 6.130653266331658e-05, |
|
"loss": 0.9964, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.3096085409252669, |
|
"grad_norm": 0.40449681878089905, |
|
"learning_rate": 6.110552763819096e-05, |
|
"loss": 0.8698, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.31005338078291816, |
|
"grad_norm": 0.42647475004196167, |
|
"learning_rate": 6.090452261306533e-05, |
|
"loss": 0.9248, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.3104982206405694, |
|
"grad_norm": 0.3729760944843292, |
|
"learning_rate": 6.070351758793971e-05, |
|
"loss": 0.9945, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.3109430604982206, |
|
"grad_norm": 0.34256407618522644, |
|
"learning_rate": 6.0502512562814076e-05, |
|
"loss": 0.8589, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.3113879003558719, |
|
"grad_norm": 0.37434864044189453, |
|
"learning_rate": 6.030150753768844e-05, |
|
"loss": 0.8921, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.31183274021352314, |
|
"grad_norm": 0.3740074336528778, |
|
"learning_rate": 6.0100502512562815e-05, |
|
"loss": 0.9538, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.3122775800711744, |
|
"grad_norm": 0.2955409288406372, |
|
"learning_rate": 5.989949748743718e-05, |
|
"loss": 0.8796, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.3127224199288256, |
|
"grad_norm": 0.41662129759788513, |
|
"learning_rate": 5.969849246231156e-05, |
|
"loss": 0.976, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.31316725978647686, |
|
"grad_norm": 0.3664419651031494, |
|
"learning_rate": 5.949748743718593e-05, |
|
"loss": 0.9813, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.3136120996441281, |
|
"grad_norm": 0.4082026779651642, |
|
"learning_rate": 5.929648241206031e-05, |
|
"loss": 0.9604, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.3140569395017794, |
|
"grad_norm": 0.34381023049354553, |
|
"learning_rate": 5.909547738693467e-05, |
|
"loss": 0.9438, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.3145017793594306, |
|
"grad_norm": 0.32259446382522583, |
|
"learning_rate": 5.889447236180905e-05, |
|
"loss": 0.8939, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.31494661921708184, |
|
"grad_norm": 0.4534759819507599, |
|
"learning_rate": 5.869346733668342e-05, |
|
"loss": 1.0139, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.3153914590747331, |
|
"grad_norm": 0.3195643723011017, |
|
"learning_rate": 5.849246231155779e-05, |
|
"loss": 0.8886, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.31583629893238435, |
|
"grad_norm": 0.34111788868904114, |
|
"learning_rate": 5.829145728643216e-05, |
|
"loss": 0.9006, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3162811387900356, |
|
"grad_norm": 0.32145076990127563, |
|
"learning_rate": 5.809045226130654e-05, |
|
"loss": 0.9042, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.3167259786476868, |
|
"grad_norm": 0.38871321082115173, |
|
"learning_rate": 5.7889447236180904e-05, |
|
"loss": 0.8285, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.31717081850533807, |
|
"grad_norm": 0.34648001194000244, |
|
"learning_rate": 5.7688442211055284e-05, |
|
"loss": 0.8948, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.31761565836298933, |
|
"grad_norm": 0.42825043201446533, |
|
"learning_rate": 5.748743718592965e-05, |
|
"loss": 0.9665, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.3180604982206406, |
|
"grad_norm": 0.4382406771183014, |
|
"learning_rate": 5.728643216080403e-05, |
|
"loss": 1.0006, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.3185053380782918, |
|
"grad_norm": 0.3384600579738617, |
|
"learning_rate": 5.7085427135678396e-05, |
|
"loss": 0.893, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.31895017793594305, |
|
"grad_norm": 0.329862505197525, |
|
"learning_rate": 5.688442211055277e-05, |
|
"loss": 0.8064, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.3193950177935943, |
|
"grad_norm": 0.36745166778564453, |
|
"learning_rate": 5.6683417085427135e-05, |
|
"loss": 0.9621, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.31983985765124556, |
|
"grad_norm": 0.36240148544311523, |
|
"learning_rate": 5.6482412060301515e-05, |
|
"loss": 0.8902, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.3202846975088968, |
|
"grad_norm": 0.3071519136428833, |
|
"learning_rate": 5.628140703517588e-05, |
|
"loss": 0.9572, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.320729537366548, |
|
"grad_norm": 0.3393983840942383, |
|
"learning_rate": 5.608040201005026e-05, |
|
"loss": 0.9021, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.3211743772241993, |
|
"grad_norm": 0.3375062942504883, |
|
"learning_rate": 5.587939698492463e-05, |
|
"loss": 0.8505, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.32161921708185054, |
|
"grad_norm": 0.36166590452194214, |
|
"learning_rate": 5.567839195979899e-05, |
|
"loss": 0.9217, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.3220640569395018, |
|
"grad_norm": 0.423556923866272, |
|
"learning_rate": 5.547738693467337e-05, |
|
"loss": 0.8722, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.322508896797153, |
|
"grad_norm": 0.3881194293498993, |
|
"learning_rate": 5.527638190954774e-05, |
|
"loss": 0.9246, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.32295373665480426, |
|
"grad_norm": 0.41174158453941345, |
|
"learning_rate": 5.507537688442211e-05, |
|
"loss": 0.9962, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.3233985765124555, |
|
"grad_norm": 0.3954409062862396, |
|
"learning_rate": 5.487437185929648e-05, |
|
"loss": 0.8989, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.3238434163701068, |
|
"grad_norm": 0.371358722448349, |
|
"learning_rate": 5.467336683417086e-05, |
|
"loss": 0.9655, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.32428825622775803, |
|
"grad_norm": 0.4016554355621338, |
|
"learning_rate": 5.4472361809045224e-05, |
|
"loss": 0.9298, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.32473309608540923, |
|
"grad_norm": 0.45096760988235474, |
|
"learning_rate": 5.4271356783919604e-05, |
|
"loss": 1.0633, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3251779359430605, |
|
"grad_norm": 0.42669543623924255, |
|
"learning_rate": 5.407035175879397e-05, |
|
"loss": 0.9897, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.32562277580071175, |
|
"grad_norm": 0.3072949945926666, |
|
"learning_rate": 5.386934673366835e-05, |
|
"loss": 0.8538, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.326067615658363, |
|
"grad_norm": 0.3681629002094269, |
|
"learning_rate": 5.3668341708542716e-05, |
|
"loss": 0.9532, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.3265124555160142, |
|
"grad_norm": 0.3667491674423218, |
|
"learning_rate": 5.346733668341709e-05, |
|
"loss": 0.9971, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.32695729537366547, |
|
"grad_norm": 0.3561696410179138, |
|
"learning_rate": 5.3266331658291455e-05, |
|
"loss": 0.9332, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.3274021352313167, |
|
"grad_norm": 0.51035076379776, |
|
"learning_rate": 5.3065326633165835e-05, |
|
"loss": 0.9938, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.327846975088968, |
|
"grad_norm": 0.30791398882865906, |
|
"learning_rate": 5.28643216080402e-05, |
|
"loss": 0.9195, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.32829181494661924, |
|
"grad_norm": 0.37262293696403503, |
|
"learning_rate": 5.266331658291458e-05, |
|
"loss": 0.8857, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.32873665480427045, |
|
"grad_norm": 0.3665037751197815, |
|
"learning_rate": 5.246231155778895e-05, |
|
"loss": 0.8634, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.3291814946619217, |
|
"grad_norm": 0.3333563804626465, |
|
"learning_rate": 5.226130653266332e-05, |
|
"loss": 0.9507, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.32962633451957296, |
|
"grad_norm": 0.3755170404911041, |
|
"learning_rate": 5.206030150753769e-05, |
|
"loss": 0.9742, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.3300711743772242, |
|
"grad_norm": 0.3400593101978302, |
|
"learning_rate": 5.1859296482412066e-05, |
|
"loss": 0.8169, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.3305160142348754, |
|
"grad_norm": 0.5976528525352478, |
|
"learning_rate": 5.165829145728643e-05, |
|
"loss": 1.0596, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.3309608540925267, |
|
"grad_norm": 0.3495856523513794, |
|
"learning_rate": 5.145728643216081e-05, |
|
"loss": 1.1273, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.33140569395017794, |
|
"grad_norm": 0.3898337483406067, |
|
"learning_rate": 5.125628140703518e-05, |
|
"loss": 1.0512, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.3318505338078292, |
|
"grad_norm": 0.4057525396347046, |
|
"learning_rate": 5.1055276381909544e-05, |
|
"loss": 1.0474, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.33229537366548045, |
|
"grad_norm": 0.365829199552536, |
|
"learning_rate": 5.0854271356783924e-05, |
|
"loss": 1.0095, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.33274021352313166, |
|
"grad_norm": 0.3495554029941559, |
|
"learning_rate": 5.065326633165829e-05, |
|
"loss": 0.8723, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.3331850533807829, |
|
"grad_norm": 0.3964870572090149, |
|
"learning_rate": 5.045226130653266e-05, |
|
"loss": 0.9775, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.33362989323843417, |
|
"grad_norm": 0.3591833710670471, |
|
"learning_rate": 5.0251256281407036e-05, |
|
"loss": 0.8438, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.33407473309608543, |
|
"grad_norm": 0.4429285526275635, |
|
"learning_rate": 5.005025125628141e-05, |
|
"loss": 0.9144, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.33451957295373663, |
|
"grad_norm": 0.3427349328994751, |
|
"learning_rate": 4.984924623115578e-05, |
|
"loss": 0.8728, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.3349644128113879, |
|
"grad_norm": 0.4042606055736542, |
|
"learning_rate": 4.9648241206030155e-05, |
|
"loss": 1.0932, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.33540925266903915, |
|
"grad_norm": 0.38601040840148926, |
|
"learning_rate": 4.944723618090453e-05, |
|
"loss": 0.8947, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.3358540925266904, |
|
"grad_norm": 0.31526607275009155, |
|
"learning_rate": 4.92462311557789e-05, |
|
"loss": 0.8783, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.33629893238434166, |
|
"grad_norm": 0.3799164593219757, |
|
"learning_rate": 4.9045226130653274e-05, |
|
"loss": 0.894, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.33674377224199287, |
|
"grad_norm": 0.36352670192718506, |
|
"learning_rate": 4.884422110552764e-05, |
|
"loss": 1.0077, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.3371886120996441, |
|
"grad_norm": 0.37971585988998413, |
|
"learning_rate": 4.864321608040201e-05, |
|
"loss": 0.8504, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.3376334519572954, |
|
"grad_norm": 0.3846040666103363, |
|
"learning_rate": 4.844221105527638e-05, |
|
"loss": 0.9923, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.33807829181494664, |
|
"grad_norm": 0.3028171956539154, |
|
"learning_rate": 4.824120603015075e-05, |
|
"loss": 0.9328, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.33852313167259784, |
|
"grad_norm": 0.35201942920684814, |
|
"learning_rate": 4.8040201005025125e-05, |
|
"loss": 0.8622, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.3389679715302491, |
|
"grad_norm": 0.3402314782142639, |
|
"learning_rate": 4.78391959798995e-05, |
|
"loss": 1.0578, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.33941281138790036, |
|
"grad_norm": 0.43474265933036804, |
|
"learning_rate": 4.763819095477387e-05, |
|
"loss": 1.0589, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.3398576512455516, |
|
"grad_norm": 0.4756290912628174, |
|
"learning_rate": 4.7437185929648244e-05, |
|
"loss": 1.1027, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.3403024911032028, |
|
"grad_norm": 0.3077346682548523, |
|
"learning_rate": 4.723618090452262e-05, |
|
"loss": 0.9635, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.3407473309608541, |
|
"grad_norm": 0.36826518177986145, |
|
"learning_rate": 4.703517587939698e-05, |
|
"loss": 0.9687, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.34119217081850534, |
|
"grad_norm": 0.4034232199192047, |
|
"learning_rate": 4.6834170854271356e-05, |
|
"loss": 0.9209, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.3416370106761566, |
|
"grad_norm": 0.41342729330062866, |
|
"learning_rate": 4.663316582914573e-05, |
|
"loss": 0.9213, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.34208185053380785, |
|
"grad_norm": 0.3749755620956421, |
|
"learning_rate": 4.64321608040201e-05, |
|
"loss": 0.9386, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.34252669039145905, |
|
"grad_norm": 0.3411119878292084, |
|
"learning_rate": 4.6231155778894475e-05, |
|
"loss": 0.8805, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.3429715302491103, |
|
"grad_norm": 0.3668997883796692, |
|
"learning_rate": 4.603015075376885e-05, |
|
"loss": 0.9887, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.34341637010676157, |
|
"grad_norm": 0.45816823840141296, |
|
"learning_rate": 4.582914572864322e-05, |
|
"loss": 0.9031, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.34386120996441283, |
|
"grad_norm": 0.3586982786655426, |
|
"learning_rate": 4.5628140703517594e-05, |
|
"loss": 0.9774, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.34430604982206403, |
|
"grad_norm": 0.42974361777305603, |
|
"learning_rate": 4.542713567839196e-05, |
|
"loss": 1.0421, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.3447508896797153, |
|
"grad_norm": 0.36137211322784424, |
|
"learning_rate": 4.522613065326633e-05, |
|
"loss": 1.0171, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.34519572953736655, |
|
"grad_norm": 0.413507342338562, |
|
"learning_rate": 4.5025125628140706e-05, |
|
"loss": 0.9786, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.3456405693950178, |
|
"grad_norm": 0.42441946268081665, |
|
"learning_rate": 4.482412060301508e-05, |
|
"loss": 1.01, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.34608540925266906, |
|
"grad_norm": 0.35072627663612366, |
|
"learning_rate": 4.462311557788945e-05, |
|
"loss": 0.9709, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.34653024911032027, |
|
"grad_norm": 0.37332162261009216, |
|
"learning_rate": 4.4422110552763825e-05, |
|
"loss": 0.9844, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.3469750889679715, |
|
"grad_norm": 0.388919860124588, |
|
"learning_rate": 4.42211055276382e-05, |
|
"loss": 0.8525, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3474199288256228, |
|
"grad_norm": 0.3552108407020569, |
|
"learning_rate": 4.4020100502512564e-05, |
|
"loss": 0.9713, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.34786476868327404, |
|
"grad_norm": 0.3655258119106293, |
|
"learning_rate": 4.381909547738694e-05, |
|
"loss": 0.9532, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.34830960854092524, |
|
"grad_norm": 0.38755300641059875, |
|
"learning_rate": 4.3618090452261303e-05, |
|
"loss": 0.9346, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.3487544483985765, |
|
"grad_norm": 0.4547751843929291, |
|
"learning_rate": 4.3417085427135676e-05, |
|
"loss": 0.8872, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.34919928825622776, |
|
"grad_norm": 0.2990303039550781, |
|
"learning_rate": 4.321608040201005e-05, |
|
"loss": 0.8979, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.349644128113879, |
|
"grad_norm": 0.3581852614879608, |
|
"learning_rate": 4.301507537688442e-05, |
|
"loss": 0.9466, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.3500889679715303, |
|
"grad_norm": 0.33662378787994385, |
|
"learning_rate": 4.2814070351758795e-05, |
|
"loss": 0.9274, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.3505338078291815, |
|
"grad_norm": 0.34652361273765564, |
|
"learning_rate": 4.261306532663317e-05, |
|
"loss": 0.9791, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.35097864768683273, |
|
"grad_norm": 0.39401039481163025, |
|
"learning_rate": 4.241206030150754e-05, |
|
"loss": 0.9485, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.351423487544484, |
|
"grad_norm": 0.36272600293159485, |
|
"learning_rate": 4.2211055276381914e-05, |
|
"loss": 0.9341, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.35186832740213525, |
|
"grad_norm": 0.3438786566257477, |
|
"learning_rate": 4.201005025125628e-05, |
|
"loss": 0.8061, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.35231316725978645, |
|
"grad_norm": 0.396484911441803, |
|
"learning_rate": 4.180904522613065e-05, |
|
"loss": 0.9506, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.3527580071174377, |
|
"grad_norm": 0.31982484459877014, |
|
"learning_rate": 4.1608040201005026e-05, |
|
"loss": 0.9372, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.35320284697508897, |
|
"grad_norm": 0.323798805475235, |
|
"learning_rate": 4.14070351758794e-05, |
|
"loss": 0.9342, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.3536476868327402, |
|
"grad_norm": 0.3632016181945801, |
|
"learning_rate": 4.120603015075377e-05, |
|
"loss": 0.9682, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.3540925266903915, |
|
"grad_norm": 0.3542345464229584, |
|
"learning_rate": 4.1005025125628145e-05, |
|
"loss": 0.9129, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.3545373665480427, |
|
"grad_norm": 0.48540955781936646, |
|
"learning_rate": 4.080402010050252e-05, |
|
"loss": 1.0319, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.35498220640569395, |
|
"grad_norm": 0.37020549178123474, |
|
"learning_rate": 4.060301507537689e-05, |
|
"loss": 0.8719, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.3554270462633452, |
|
"grad_norm": 0.3740653097629547, |
|
"learning_rate": 4.040201005025126e-05, |
|
"loss": 0.9464, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.35587188612099646, |
|
"grad_norm": 0.3594771921634674, |
|
"learning_rate": 4.020100502512563e-05, |
|
"loss": 0.9592, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.35631672597864766, |
|
"grad_norm": 0.4490291476249695, |
|
"learning_rate": 4e-05, |
|
"loss": 0.9624, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.3567615658362989, |
|
"grad_norm": 0.3087805509567261, |
|
"learning_rate": 3.9798994974874376e-05, |
|
"loss": 0.9905, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.3572064056939502, |
|
"grad_norm": 0.39388784766197205, |
|
"learning_rate": 3.959798994974875e-05, |
|
"loss": 0.9644, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.35765124555160144, |
|
"grad_norm": 0.411502867937088, |
|
"learning_rate": 3.9396984924623115e-05, |
|
"loss": 0.9547, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.3580960854092527, |
|
"grad_norm": 0.3525221049785614, |
|
"learning_rate": 3.919597989949749e-05, |
|
"loss": 0.8875, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.3585409252669039, |
|
"grad_norm": 0.3365210294723511, |
|
"learning_rate": 3.899497487437186e-05, |
|
"loss": 0.913, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.35898576512455516, |
|
"grad_norm": 0.3405802249908447, |
|
"learning_rate": 3.8793969849246234e-05, |
|
"loss": 0.8679, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.3594306049822064, |
|
"grad_norm": 0.33175504207611084, |
|
"learning_rate": 3.85929648241206e-05, |
|
"loss": 0.929, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.35987544483985767, |
|
"grad_norm": 0.3878396451473236, |
|
"learning_rate": 3.8391959798994973e-05, |
|
"loss": 0.9765, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.3603202846975089, |
|
"grad_norm": 0.3880787491798401, |
|
"learning_rate": 3.8190954773869346e-05, |
|
"loss": 0.9282, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.36076512455516013, |
|
"grad_norm": 0.3783409297466278, |
|
"learning_rate": 3.798994974874372e-05, |
|
"loss": 0.9704, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.3612099644128114, |
|
"grad_norm": 0.37962606549263, |
|
"learning_rate": 3.778894472361809e-05, |
|
"loss": 1.0464, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.36165480427046265, |
|
"grad_norm": 0.4055241644382477, |
|
"learning_rate": 3.7587939698492465e-05, |
|
"loss": 0.9668, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.3620996441281139, |
|
"grad_norm": 0.3240121304988861, |
|
"learning_rate": 3.738693467336684e-05, |
|
"loss": 0.9415, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.3625444839857651, |
|
"grad_norm": 0.29463499784469604, |
|
"learning_rate": 3.7185929648241204e-05, |
|
"loss": 0.9489, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.36298932384341637, |
|
"grad_norm": 0.4620193839073181, |
|
"learning_rate": 3.698492462311558e-05, |
|
"loss": 0.8513, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.3634341637010676, |
|
"grad_norm": 0.3686508536338806, |
|
"learning_rate": 3.678391959798995e-05, |
|
"loss": 1.016, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.3638790035587189, |
|
"grad_norm": 0.366397500038147, |
|
"learning_rate": 3.658291457286432e-05, |
|
"loss": 0.8772, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.3643238434163701, |
|
"grad_norm": 0.3382261097431183, |
|
"learning_rate": 3.6381909547738696e-05, |
|
"loss": 0.9158, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.36476868327402134, |
|
"grad_norm": 0.3870478570461273, |
|
"learning_rate": 3.618090452261307e-05, |
|
"loss": 0.9776, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3652135231316726, |
|
"grad_norm": 0.40006503462791443, |
|
"learning_rate": 3.597989949748744e-05, |
|
"loss": 0.9127, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.36565836298932386, |
|
"grad_norm": 0.41942328214645386, |
|
"learning_rate": 3.5778894472361815e-05, |
|
"loss": 0.8786, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.36610320284697506, |
|
"grad_norm": 0.34577417373657227, |
|
"learning_rate": 3.557788944723618e-05, |
|
"loss": 0.9029, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.3665480427046263, |
|
"grad_norm": 0.3666742146015167, |
|
"learning_rate": 3.5376884422110554e-05, |
|
"loss": 0.9616, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.3669928825622776, |
|
"grad_norm": 0.41306397318840027, |
|
"learning_rate": 3.517587939698493e-05, |
|
"loss": 0.8994, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.36743772241992884, |
|
"grad_norm": 0.354464054107666, |
|
"learning_rate": 3.49748743718593e-05, |
|
"loss": 0.8398, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.3678825622775801, |
|
"grad_norm": 0.40960076451301575, |
|
"learning_rate": 3.4773869346733667e-05, |
|
"loss": 1.0062, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.3683274021352313, |
|
"grad_norm": 0.35255295038223267, |
|
"learning_rate": 3.457286432160804e-05, |
|
"loss": 1.013, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.36877224199288255, |
|
"grad_norm": 0.320544570684433, |
|
"learning_rate": 3.437185929648241e-05, |
|
"loss": 0.8456, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.3692170818505338, |
|
"grad_norm": 0.3942032754421234, |
|
"learning_rate": 3.4170854271356785e-05, |
|
"loss": 0.9627, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.36966192170818507, |
|
"grad_norm": 0.34646615386009216, |
|
"learning_rate": 3.396984924623116e-05, |
|
"loss": 0.9397, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.3701067615658363, |
|
"grad_norm": 0.3462013006210327, |
|
"learning_rate": 3.3768844221105525e-05, |
|
"loss": 0.9722, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.37055160142348753, |
|
"grad_norm": 0.44638511538505554, |
|
"learning_rate": 3.35678391959799e-05, |
|
"loss": 1.0433, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.3709964412811388, |
|
"grad_norm": 0.3865186870098114, |
|
"learning_rate": 3.336683417085427e-05, |
|
"loss": 0.9243, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.37144128113879005, |
|
"grad_norm": 0.3224859833717346, |
|
"learning_rate": 3.3165829145728643e-05, |
|
"loss": 0.9197, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.3718861209964413, |
|
"grad_norm": 0.3511156439781189, |
|
"learning_rate": 3.2964824120603016e-05, |
|
"loss": 0.951, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.3723309608540925, |
|
"grad_norm": 0.3146522343158722, |
|
"learning_rate": 3.276381909547739e-05, |
|
"loss": 0.9514, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.37277580071174377, |
|
"grad_norm": 0.3094431757926941, |
|
"learning_rate": 3.256281407035176e-05, |
|
"loss": 0.8729, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.373220640569395, |
|
"grad_norm": 0.3724329471588135, |
|
"learning_rate": 3.2361809045226135e-05, |
|
"loss": 0.9897, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.3736654804270463, |
|
"grad_norm": 0.3881984353065491, |
|
"learning_rate": 3.21608040201005e-05, |
|
"loss": 0.9755, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.3741103202846975, |
|
"grad_norm": 0.30279484391212463, |
|
"learning_rate": 3.1959798994974875e-05, |
|
"loss": 0.8903, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.37455516014234874, |
|
"grad_norm": 0.42050573229789734, |
|
"learning_rate": 3.175879396984925e-05, |
|
"loss": 0.9163, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 0.3830379545688629, |
|
"learning_rate": 3.155778894472362e-05, |
|
"loss": 0.9519, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.37544483985765126, |
|
"grad_norm": 0.32881635427474976, |
|
"learning_rate": 3.1356783919597993e-05, |
|
"loss": 0.9225, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.3758896797153025, |
|
"grad_norm": 0.30189692974090576, |
|
"learning_rate": 3.1155778894472366e-05, |
|
"loss": 0.916, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.3763345195729537, |
|
"grad_norm": 0.37163209915161133, |
|
"learning_rate": 3.095477386934674e-05, |
|
"loss": 0.9399, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.376779359430605, |
|
"grad_norm": 0.43418970704078674, |
|
"learning_rate": 3.075376884422111e-05, |
|
"loss": 1.0015, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.37722419928825623, |
|
"grad_norm": 0.3178066313266754, |
|
"learning_rate": 3.055276381909548e-05, |
|
"loss": 0.9131, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.3776690391459075, |
|
"grad_norm": 0.3209547996520996, |
|
"learning_rate": 3.0351758793969855e-05, |
|
"loss": 0.9498, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.3781138790035587, |
|
"grad_norm": 0.4426248371601105, |
|
"learning_rate": 3.015075376884422e-05, |
|
"loss": 0.9335, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.37855871886120995, |
|
"grad_norm": 0.41413775086402893, |
|
"learning_rate": 2.994974874371859e-05, |
|
"loss": 0.8906, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.3790035587188612, |
|
"grad_norm": 0.3665112555027008, |
|
"learning_rate": 2.9748743718592964e-05, |
|
"loss": 0.8849, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.37944839857651247, |
|
"grad_norm": 0.36841294169425964, |
|
"learning_rate": 2.9547738693467337e-05, |
|
"loss": 0.8004, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.3798932384341637, |
|
"grad_norm": 0.4228864014148712, |
|
"learning_rate": 2.934673366834171e-05, |
|
"loss": 1.0018, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.38033807829181493, |
|
"grad_norm": 0.34962666034698486, |
|
"learning_rate": 2.914572864321608e-05, |
|
"loss": 0.9105, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.3807829181494662, |
|
"grad_norm": 0.4448448121547699, |
|
"learning_rate": 2.8944723618090452e-05, |
|
"loss": 0.9391, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.38122775800711745, |
|
"grad_norm": 0.36843836307525635, |
|
"learning_rate": 2.8743718592964825e-05, |
|
"loss": 0.9911, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.3816725978647687, |
|
"grad_norm": 0.35289087891578674, |
|
"learning_rate": 2.8542713567839198e-05, |
|
"loss": 0.9575, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.3821174377224199, |
|
"grad_norm": 0.39545342326164246, |
|
"learning_rate": 2.8341708542713568e-05, |
|
"loss": 0.9074, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.38256227758007116, |
|
"grad_norm": 0.3101719319820404, |
|
"learning_rate": 2.814070351758794e-05, |
|
"loss": 0.8479, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3830071174377224, |
|
"grad_norm": 0.38031789660453796, |
|
"learning_rate": 2.7939698492462314e-05, |
|
"loss": 1.0281, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.3834519572953737, |
|
"grad_norm": 0.38144952058792114, |
|
"learning_rate": 2.7738693467336686e-05, |
|
"loss": 0.9668, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.38389679715302494, |
|
"grad_norm": 0.41572901606559753, |
|
"learning_rate": 2.7537688442211056e-05, |
|
"loss": 0.9258, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.38434163701067614, |
|
"grad_norm": 0.2982839345932007, |
|
"learning_rate": 2.733668341708543e-05, |
|
"loss": 0.8807, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.3847864768683274, |
|
"grad_norm": 0.3221079111099243, |
|
"learning_rate": 2.7135678391959802e-05, |
|
"loss": 0.9178, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.38523131672597866, |
|
"grad_norm": 0.38755419850349426, |
|
"learning_rate": 2.6934673366834175e-05, |
|
"loss": 0.9183, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.3856761565836299, |
|
"grad_norm": 0.36158278584480286, |
|
"learning_rate": 2.6733668341708545e-05, |
|
"loss": 0.9274, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.3861209964412811, |
|
"grad_norm": 0.308855801820755, |
|
"learning_rate": 2.6532663316582917e-05, |
|
"loss": 0.9158, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.3865658362989324, |
|
"grad_norm": 0.42090025544166565, |
|
"learning_rate": 2.633165829145729e-05, |
|
"loss": 0.9392, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.38701067615658363, |
|
"grad_norm": 0.3310891389846802, |
|
"learning_rate": 2.613065326633166e-05, |
|
"loss": 0.9232, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3874555160142349, |
|
"grad_norm": 0.3915763199329376, |
|
"learning_rate": 2.5929648241206033e-05, |
|
"loss": 0.8819, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.3879003558718861, |
|
"grad_norm": 0.3351011872291565, |
|
"learning_rate": 2.5728643216080406e-05, |
|
"loss": 0.9757, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.38834519572953735, |
|
"grad_norm": 0.32709652185440063, |
|
"learning_rate": 2.5527638190954772e-05, |
|
"loss": 0.8823, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.3887900355871886, |
|
"grad_norm": 0.34310227632522583, |
|
"learning_rate": 2.5326633165829145e-05, |
|
"loss": 0.8623, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.38923487544483987, |
|
"grad_norm": 0.3940928876399994, |
|
"learning_rate": 2.5125628140703518e-05, |
|
"loss": 0.9409, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3896797153024911, |
|
"grad_norm": 0.34565237164497375, |
|
"learning_rate": 2.492462311557789e-05, |
|
"loss": 0.9369, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.39012455516014233, |
|
"grad_norm": 0.37526604533195496, |
|
"learning_rate": 2.4723618090452264e-05, |
|
"loss": 0.8391, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.3905693950177936, |
|
"grad_norm": 0.3760671019554138, |
|
"learning_rate": 2.4522613065326637e-05, |
|
"loss": 0.9549, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.39101423487544484, |
|
"grad_norm": 0.35706883668899536, |
|
"learning_rate": 2.4321608040201007e-05, |
|
"loss": 0.7821, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.3914590747330961, |
|
"grad_norm": 0.37825968861579895, |
|
"learning_rate": 2.4120603015075376e-05, |
|
"loss": 1.0344, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3919039145907473, |
|
"grad_norm": 0.3388988673686981, |
|
"learning_rate": 2.391959798994975e-05, |
|
"loss": 0.9347, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.39234875444839856, |
|
"grad_norm": 0.35239583253860474, |
|
"learning_rate": 2.3718592964824122e-05, |
|
"loss": 0.8896, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.3927935943060498, |
|
"grad_norm": 0.34962180256843567, |
|
"learning_rate": 2.351758793969849e-05, |
|
"loss": 0.9186, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.3932384341637011, |
|
"grad_norm": 0.3848302960395813, |
|
"learning_rate": 2.3316582914572865e-05, |
|
"loss": 1.0796, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.39368327402135234, |
|
"grad_norm": 0.4643072783946991, |
|
"learning_rate": 2.3115577889447238e-05, |
|
"loss": 0.9133, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.39412811387900354, |
|
"grad_norm": 0.3217661678791046, |
|
"learning_rate": 2.291457286432161e-05, |
|
"loss": 0.9077, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.3945729537366548, |
|
"grad_norm": 0.307915061712265, |
|
"learning_rate": 2.271356783919598e-05, |
|
"loss": 0.9465, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.39501779359430605, |
|
"grad_norm": 0.41546520590782166, |
|
"learning_rate": 2.2512562814070353e-05, |
|
"loss": 0.9453, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.3954626334519573, |
|
"grad_norm": 0.44572389125823975, |
|
"learning_rate": 2.2311557788944726e-05, |
|
"loss": 1.1163, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.3959074733096085, |
|
"grad_norm": 0.3532145619392395, |
|
"learning_rate": 2.21105527638191e-05, |
|
"loss": 0.9142, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.3963523131672598, |
|
"grad_norm": 0.3812106251716614, |
|
"learning_rate": 2.190954773869347e-05, |
|
"loss": 0.8557, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.39679715302491103, |
|
"grad_norm": 0.40992099046707153, |
|
"learning_rate": 2.1708542713567838e-05, |
|
"loss": 0.9685, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.3972419928825623, |
|
"grad_norm": 0.35445836186408997, |
|
"learning_rate": 2.150753768844221e-05, |
|
"loss": 0.9416, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.39768683274021355, |
|
"grad_norm": 0.34018823504447937, |
|
"learning_rate": 2.1306532663316584e-05, |
|
"loss": 0.9203, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.39813167259786475, |
|
"grad_norm": 0.37122175097465515, |
|
"learning_rate": 2.1105527638190957e-05, |
|
"loss": 1.1101, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.398576512455516, |
|
"grad_norm": 0.3946356177330017, |
|
"learning_rate": 2.0904522613065327e-05, |
|
"loss": 0.932, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.39902135231316727, |
|
"grad_norm": 0.3920304775238037, |
|
"learning_rate": 2.07035175879397e-05, |
|
"loss": 0.9307, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.3994661921708185, |
|
"grad_norm": 0.35796669125556946, |
|
"learning_rate": 2.0502512562814073e-05, |
|
"loss": 0.9004, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.3999110320284697, |
|
"grad_norm": 0.37431228160858154, |
|
"learning_rate": 2.0301507537688446e-05, |
|
"loss": 0.9462, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.400355871886121, |
|
"grad_norm": 0.43452969193458557, |
|
"learning_rate": 2.0100502512562815e-05, |
|
"loss": 0.9755, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.40080071174377224, |
|
"grad_norm": 0.308403342962265, |
|
"learning_rate": 1.9899497487437188e-05, |
|
"loss": 0.8736, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.4012455516014235, |
|
"grad_norm": 0.425761342048645, |
|
"learning_rate": 1.9698492462311558e-05, |
|
"loss": 1.0335, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.40169039145907476, |
|
"grad_norm": 0.4178042709827423, |
|
"learning_rate": 1.949748743718593e-05, |
|
"loss": 0.8853, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.40213523131672596, |
|
"grad_norm": 0.31192678213119507, |
|
"learning_rate": 1.92964824120603e-05, |
|
"loss": 0.8916, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.4025800711743772, |
|
"grad_norm": 0.4430267810821533, |
|
"learning_rate": 1.9095477386934673e-05, |
|
"loss": 0.962, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.4030249110320285, |
|
"grad_norm": 0.34475943446159363, |
|
"learning_rate": 1.8894472361809046e-05, |
|
"loss": 0.8855, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.40346975088967973, |
|
"grad_norm": 0.36281871795654297, |
|
"learning_rate": 1.869346733668342e-05, |
|
"loss": 0.8925, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.40391459074733094, |
|
"grad_norm": 0.39103028178215027, |
|
"learning_rate": 1.849246231155779e-05, |
|
"loss": 0.86, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.4043594306049822, |
|
"grad_norm": 0.3842105567455292, |
|
"learning_rate": 1.829145728643216e-05, |
|
"loss": 1.0245, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.40480427046263345, |
|
"grad_norm": 0.4096086025238037, |
|
"learning_rate": 1.8090452261306535e-05, |
|
"loss": 0.8985, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4052491103202847, |
|
"grad_norm": 0.3068806827068329, |
|
"learning_rate": 1.7889447236180908e-05, |
|
"loss": 0.804, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.40569395017793597, |
|
"grad_norm": 0.28686726093292236, |
|
"learning_rate": 1.7688442211055277e-05, |
|
"loss": 1.0117, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.40613879003558717, |
|
"grad_norm": 0.2880958616733551, |
|
"learning_rate": 1.748743718592965e-05, |
|
"loss": 0.8703, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.40658362989323843, |
|
"grad_norm": 0.3917515277862549, |
|
"learning_rate": 1.728643216080402e-05, |
|
"loss": 0.968, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.4070284697508897, |
|
"grad_norm": 0.3814576268196106, |
|
"learning_rate": 1.7085427135678393e-05, |
|
"loss": 0.9305, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.40747330960854095, |
|
"grad_norm": 0.3277512192726135, |
|
"learning_rate": 1.6884422110552762e-05, |
|
"loss": 0.8769, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.40791814946619215, |
|
"grad_norm": 0.3101690709590912, |
|
"learning_rate": 1.6683417085427135e-05, |
|
"loss": 0.8974, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.4083629893238434, |
|
"grad_norm": 0.34977224469184875, |
|
"learning_rate": 1.6482412060301508e-05, |
|
"loss": 0.9463, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.40880782918149466, |
|
"grad_norm": 0.35917431116104126, |
|
"learning_rate": 1.628140703517588e-05, |
|
"loss": 0.9637, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.4092526690391459, |
|
"grad_norm": 0.36873766779899597, |
|
"learning_rate": 1.608040201005025e-05, |
|
"loss": 0.7935, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4096975088967972, |
|
"grad_norm": 0.348622590303421, |
|
"learning_rate": 1.5879396984924624e-05, |
|
"loss": 0.9165, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.4101423487544484, |
|
"grad_norm": 0.3866550028324127, |
|
"learning_rate": 1.5678391959798997e-05, |
|
"loss": 0.9679, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.41058718861209964, |
|
"grad_norm": 0.3473438620567322, |
|
"learning_rate": 1.547738693467337e-05, |
|
"loss": 0.9228, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.4110320284697509, |
|
"grad_norm": 0.43389132618904114, |
|
"learning_rate": 1.527638190954774e-05, |
|
"loss": 0.9541, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.41147686832740216, |
|
"grad_norm": 0.40540170669555664, |
|
"learning_rate": 1.507537688442211e-05, |
|
"loss": 0.9814, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.41192170818505336, |
|
"grad_norm": 0.3193083107471466, |
|
"learning_rate": 1.4874371859296482e-05, |
|
"loss": 0.9914, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.4123665480427046, |
|
"grad_norm": 0.3414134383201599, |
|
"learning_rate": 1.4673366834170855e-05, |
|
"loss": 1.0247, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.4128113879003559, |
|
"grad_norm": 0.3683622181415558, |
|
"learning_rate": 1.4472361809045226e-05, |
|
"loss": 0.997, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.41325622775800713, |
|
"grad_norm": 0.37435421347618103, |
|
"learning_rate": 1.4271356783919599e-05, |
|
"loss": 0.902, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.41370106761565834, |
|
"grad_norm": 0.31382328271865845, |
|
"learning_rate": 1.407035175879397e-05, |
|
"loss": 0.9035, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4141459074733096, |
|
"grad_norm": 0.35533666610717773, |
|
"learning_rate": 1.3869346733668343e-05, |
|
"loss": 1.0198, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.41459074733096085, |
|
"grad_norm": 0.3287508189678192, |
|
"learning_rate": 1.3668341708542715e-05, |
|
"loss": 0.8756, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.4150355871886121, |
|
"grad_norm": 0.3902789056301117, |
|
"learning_rate": 1.3467336683417087e-05, |
|
"loss": 0.8743, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.41548042704626337, |
|
"grad_norm": 0.3326834738254547, |
|
"learning_rate": 1.3266331658291459e-05, |
|
"loss": 0.8924, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.41592526690391457, |
|
"grad_norm": 0.3254244923591614, |
|
"learning_rate": 1.306532663316583e-05, |
|
"loss": 0.9863, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.41637010676156583, |
|
"grad_norm": 0.41529661417007446, |
|
"learning_rate": 1.2864321608040203e-05, |
|
"loss": 0.8844, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.4168149466192171, |
|
"grad_norm": 0.31171804666519165, |
|
"learning_rate": 1.2663316582914573e-05, |
|
"loss": 0.9245, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.41725978647686834, |
|
"grad_norm": 0.33847880363464355, |
|
"learning_rate": 1.2462311557788946e-05, |
|
"loss": 0.9414, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.41770462633451955, |
|
"grad_norm": 0.35839855670928955, |
|
"learning_rate": 1.2261306532663318e-05, |
|
"loss": 0.924, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.4181494661921708, |
|
"grad_norm": 0.3934418857097626, |
|
"learning_rate": 1.2060301507537688e-05, |
|
"loss": 0.9718, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.41859430604982206, |
|
"grad_norm": 0.35172393918037415, |
|
"learning_rate": 1.1859296482412061e-05, |
|
"loss": 0.8878, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.4190391459074733, |
|
"grad_norm": 0.4257389307022095, |
|
"learning_rate": 1.1658291457286432e-05, |
|
"loss": 0.9914, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.4194839857651246, |
|
"grad_norm": 0.3889011740684509, |
|
"learning_rate": 1.1457286432160805e-05, |
|
"loss": 0.8339, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.4199288256227758, |
|
"grad_norm": 0.35240477323532104, |
|
"learning_rate": 1.1256281407035177e-05, |
|
"loss": 0.9792, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.42037366548042704, |
|
"grad_norm": 0.3817644715309143, |
|
"learning_rate": 1.105527638190955e-05, |
|
"loss": 0.992, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.4208185053380783, |
|
"grad_norm": 0.36161094903945923, |
|
"learning_rate": 1.0854271356783919e-05, |
|
"loss": 0.8468, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.42126334519572955, |
|
"grad_norm": 0.3254898488521576, |
|
"learning_rate": 1.0653266331658292e-05, |
|
"loss": 0.9998, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.42170818505338076, |
|
"grad_norm": 0.3513511121273041, |
|
"learning_rate": 1.0452261306532663e-05, |
|
"loss": 0.8928, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.422153024911032, |
|
"grad_norm": 0.3499341309070587, |
|
"learning_rate": 1.0251256281407036e-05, |
|
"loss": 0.9137, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.4225978647686833, |
|
"grad_norm": 0.3495025932788849, |
|
"learning_rate": 1.0050251256281408e-05, |
|
"loss": 0.8818, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.42304270462633453, |
|
"grad_norm": 0.3569670021533966, |
|
"learning_rate": 9.849246231155779e-06, |
|
"loss": 0.9912, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.4234875444839858, |
|
"grad_norm": 0.394522100687027, |
|
"learning_rate": 9.64824120603015e-06, |
|
"loss": 0.863, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.423932384341637, |
|
"grad_norm": 0.35868117213249207, |
|
"learning_rate": 9.447236180904523e-06, |
|
"loss": 0.9022, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.42437722419928825, |
|
"grad_norm": 0.33881279826164246, |
|
"learning_rate": 9.246231155778894e-06, |
|
"loss": 0.9634, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.4248220640569395, |
|
"grad_norm": 0.36202844977378845, |
|
"learning_rate": 9.045226130653267e-06, |
|
"loss": 0.9441, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.42526690391459077, |
|
"grad_norm": 0.39875316619873047, |
|
"learning_rate": 8.844221105527639e-06, |
|
"loss": 0.9702, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.42571174377224197, |
|
"grad_norm": 0.3653241693973541, |
|
"learning_rate": 8.64321608040201e-06, |
|
"loss": 0.9436, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.4261565836298932, |
|
"grad_norm": 0.31130754947662354, |
|
"learning_rate": 8.442211055276381e-06, |
|
"loss": 0.8579, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.4266014234875445, |
|
"grad_norm": 0.3955715000629425, |
|
"learning_rate": 8.241206030150754e-06, |
|
"loss": 0.9889, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.42704626334519574, |
|
"grad_norm": 0.35064584016799927, |
|
"learning_rate": 8.040201005025125e-06, |
|
"loss": 0.8877, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.427491103202847, |
|
"grad_norm": 0.38837116956710815, |
|
"learning_rate": 7.839195979899498e-06, |
|
"loss": 0.9255, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.4279359430604982, |
|
"grad_norm": 0.3994256556034088, |
|
"learning_rate": 7.63819095477387e-06, |
|
"loss": 0.9613, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.42838078291814946, |
|
"grad_norm": 0.5106455087661743, |
|
"learning_rate": 7.437185929648241e-06, |
|
"loss": 0.9656, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.4288256227758007, |
|
"grad_norm": 0.46163278818130493, |
|
"learning_rate": 7.236180904522613e-06, |
|
"loss": 1.0239, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.429270462633452, |
|
"grad_norm": 0.37351593375205994, |
|
"learning_rate": 7.035175879396985e-06, |
|
"loss": 0.9637, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.4297153024911032, |
|
"grad_norm": 0.34434255957603455, |
|
"learning_rate": 6.834170854271357e-06, |
|
"loss": 0.9929, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.43016014234875444, |
|
"grad_norm": 0.3309576213359833, |
|
"learning_rate": 6.633165829145729e-06, |
|
"loss": 0.9139, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.4306049822064057, |
|
"grad_norm": 0.5205715298652649, |
|
"learning_rate": 6.4321608040201015e-06, |
|
"loss": 1.0866, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.43104982206405695, |
|
"grad_norm": 0.417214035987854, |
|
"learning_rate": 6.231155778894473e-06, |
|
"loss": 0.9765, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.4314946619217082, |
|
"grad_norm": 0.3578605651855469, |
|
"learning_rate": 6.030150753768844e-06, |
|
"loss": 0.9261, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4319395017793594, |
|
"grad_norm": 0.35047677159309387, |
|
"learning_rate": 5.829145728643216e-06, |
|
"loss": 0.8318, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.43238434163701067, |
|
"grad_norm": 0.30359870195388794, |
|
"learning_rate": 5.628140703517588e-06, |
|
"loss": 0.9497, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.43282918149466193, |
|
"grad_norm": 0.32898378372192383, |
|
"learning_rate": 5.4271356783919595e-06, |
|
"loss": 0.9873, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.4332740213523132, |
|
"grad_norm": 0.33104610443115234, |
|
"learning_rate": 5.226130653266332e-06, |
|
"loss": 0.9082, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.4337188612099644, |
|
"grad_norm": 0.3245652914047241, |
|
"learning_rate": 5.025125628140704e-06, |
|
"loss": 0.8933, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.43416370106761565, |
|
"grad_norm": 0.3264075517654419, |
|
"learning_rate": 4.824120603015075e-06, |
|
"loss": 0.9778, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.4346085409252669, |
|
"grad_norm": 0.44732666015625, |
|
"learning_rate": 4.623115577889447e-06, |
|
"loss": 0.9509, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.43505338078291816, |
|
"grad_norm": 0.35666903853416443, |
|
"learning_rate": 4.422110552763819e-06, |
|
"loss": 0.9392, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.4354982206405694, |
|
"grad_norm": 0.3573732078075409, |
|
"learning_rate": 4.2211055276381906e-06, |
|
"loss": 0.9323, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.4359430604982206, |
|
"grad_norm": 0.3136303126811981, |
|
"learning_rate": 4.020100502512563e-06, |
|
"loss": 0.959, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4363879003558719, |
|
"grad_norm": 0.36791837215423584, |
|
"learning_rate": 3.819095477386935e-06, |
|
"loss": 0.9151, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.43683274021352314, |
|
"grad_norm": 0.3033190965652466, |
|
"learning_rate": 3.6180904522613065e-06, |
|
"loss": 0.9223, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.4372775800711744, |
|
"grad_norm": 0.35878488421440125, |
|
"learning_rate": 3.4170854271356786e-06, |
|
"loss": 0.9418, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.4377224199288256, |
|
"grad_norm": 0.3010809123516083, |
|
"learning_rate": 3.2160804020100507e-06, |
|
"loss": 0.8418, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.43816725978647686, |
|
"grad_norm": 0.4400476813316345, |
|
"learning_rate": 3.015075376884422e-06, |
|
"loss": 0.9641, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.4386120996441281, |
|
"grad_norm": 0.34438541531562805, |
|
"learning_rate": 2.814070351758794e-06, |
|
"loss": 0.8904, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.4390569395017794, |
|
"grad_norm": 0.39025887846946716, |
|
"learning_rate": 2.613065326633166e-06, |
|
"loss": 0.9406, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.4395017793594306, |
|
"grad_norm": 0.29612797498703003, |
|
"learning_rate": 2.4120603015075375e-06, |
|
"loss": 0.9153, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.43994661921708184, |
|
"grad_norm": 0.3811575174331665, |
|
"learning_rate": 2.2110552763819096e-06, |
|
"loss": 0.8314, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.4403914590747331, |
|
"grad_norm": 0.3829675614833832, |
|
"learning_rate": 2.0100502512562813e-06, |
|
"loss": 0.8798, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.44083629893238435, |
|
"grad_norm": 0.33976423740386963, |
|
"learning_rate": 1.8090452261306533e-06, |
|
"loss": 0.8793, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.4412811387900356, |
|
"grad_norm": 0.3907322883605957, |
|
"learning_rate": 1.6080402010050254e-06, |
|
"loss": 0.9893, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.4417259786476868, |
|
"grad_norm": 0.33631375432014465, |
|
"learning_rate": 1.407035175879397e-06, |
|
"loss": 0.8798, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.44217081850533807, |
|
"grad_norm": 0.41036009788513184, |
|
"learning_rate": 1.2060301507537688e-06, |
|
"loss": 0.843, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.44261565836298933, |
|
"grad_norm": 0.3805226683616638, |
|
"learning_rate": 1.0050251256281407e-06, |
|
"loss": 1.0452, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.4430604982206406, |
|
"grad_norm": 0.40261736512184143, |
|
"learning_rate": 8.040201005025127e-07, |
|
"loss": 0.8972, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.4435053380782918, |
|
"grad_norm": 0.3762563467025757, |
|
"learning_rate": 6.030150753768844e-07, |
|
"loss": 0.8555, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.44395017793594305, |
|
"grad_norm": 0.3261839747428894, |
|
"learning_rate": 4.0201005025125634e-07, |
|
"loss": 0.8795, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.4443950177935943, |
|
"grad_norm": 0.309962660074234, |
|
"learning_rate": 2.0100502512562817e-07, |
|
"loss": 0.9488, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.44483985765124556, |
|
"grad_norm": 0.3620010018348694, |
|
"learning_rate": 0.0, |
|
"loss": 0.953, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.7939011197427712e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|