Upload 11 files

20f852b verified 25 days ago

175 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.44483985765124556,
	"eval_steps": 500,
	"global_step": 1000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.00044483985765124553,
	"grad_norm": 0.5906195640563965,
	"learning_rate": 4e-05,
	"loss": 1.504,
	"step": 1
	},
	{
	"epoch": 0.0008896797153024911,
	"grad_norm": 0.8538110852241516,
	"learning_rate": 8e-05,
	"loss": 1.4735,
	"step": 2
	},
	{
	"epoch": 0.0013345195729537367,
	"grad_norm": 0.4623548686504364,
	"learning_rate": 0.00012,
	"loss": 1.3887,
	"step": 3
	},
	{
	"epoch": 0.0017793594306049821,
	"grad_norm": 0.40841907262802124,
	"learning_rate": 0.00016,
	"loss": 1.2993,
	"step": 4
	},
	{
	"epoch": 0.002224199288256228,
	"grad_norm": 0.4153430759906769,
	"learning_rate": 0.0002,
	"loss": 1.3105,
	"step": 5
	},
	{
	"epoch": 0.0026690391459074734,
	"grad_norm": 0.4014444947242737,
	"learning_rate": 0.00019979899497487438,
	"loss": 1.1942,
	"step": 6
	},
	{
	"epoch": 0.003113879003558719,
	"grad_norm": 0.39270123839378357,
	"learning_rate": 0.00019959798994974876,
	"loss": 1.1688,
	"step": 7
	},
	{
	"epoch": 0.0035587188612099642,
	"grad_norm": 0.4414786696434021,
	"learning_rate": 0.00019939698492462313,
	"loss": 1.121,
	"step": 8
	},
	{
	"epoch": 0.00400355871886121,
	"grad_norm": 0.3911682069301605,
	"learning_rate": 0.0001991959798994975,
	"loss": 1.1039,
	"step": 9
	},
	{
	"epoch": 0.004448398576512456,
	"grad_norm": 0.3546655476093292,
	"learning_rate": 0.00019899497487437187,
	"loss": 1.1259,
	"step": 10
	},
	{
	"epoch": 0.004893238434163701,
	"grad_norm": 0.37283238768577576,
	"learning_rate": 0.00019879396984924622,
	"loss": 1.1599,
	"step": 11
	},
	{
	"epoch": 0.005338078291814947,
	"grad_norm": 0.35209548473358154,
	"learning_rate": 0.00019859296482412062,
	"loss": 1.2022,
	"step": 12
	},
	{
	"epoch": 0.005782918149466192,
	"grad_norm": 0.3735697865486145,
	"learning_rate": 0.000198391959798995,
	"loss": 1.2544,
	"step": 13
	},
	{
	"epoch": 0.006227758007117438,
	"grad_norm": 0.324276328086853,
	"learning_rate": 0.00019819095477386937,
	"loss": 1.1451,
	"step": 14
	},
	{
	"epoch": 0.0066725978647686835,
	"grad_norm": 0.44405269622802734,
	"learning_rate": 0.0001979899497487437,
	"loss": 1.2581,
	"step": 15
	},
	{
	"epoch": 0.0071174377224199285,
	"grad_norm": 0.40987834334373474,
	"learning_rate": 0.0001977889447236181,
	"loss": 1.1422,
	"step": 16
	},
	{
	"epoch": 0.007562277580071174,
	"grad_norm": 0.31967031955718994,
	"learning_rate": 0.00019758793969849249,
	"loss": 1.1521,
	"step": 17
	},
	{
	"epoch": 0.00800711743772242,
	"grad_norm": 0.3634157180786133,
	"learning_rate": 0.00019738693467336683,
	"loss": 1.0172,
	"step": 18
	},
	{
	"epoch": 0.008451957295373666,
	"grad_norm": 0.37751761078834534,
	"learning_rate": 0.0001971859296482412,
	"loss": 1.1343,
	"step": 19
	},
	{
	"epoch": 0.008896797153024912,
	"grad_norm": 0.3501513600349426,
	"learning_rate": 0.0001969849246231156,
	"loss": 1.1137,
	"step": 20
	},
	{
	"epoch": 0.009341637010676156,
	"grad_norm": 0.41018760204315186,
	"learning_rate": 0.00019678391959798995,
	"loss": 1.0601,
	"step": 21
	},
	{
	"epoch": 0.009786476868327402,
	"grad_norm": 0.5334712266921997,
	"learning_rate": 0.00019658291457286432,
	"loss": 1.2332,
	"step": 22
	},
	{
	"epoch": 0.010231316725978648,
	"grad_norm": 0.5504945516586304,
	"learning_rate": 0.0001963819095477387,
	"loss": 1.1928,
	"step": 23
	},
	{
	"epoch": 0.010676156583629894,
	"grad_norm": 0.39751070737838745,
	"learning_rate": 0.0001961809045226131,
	"loss": 0.9774,
	"step": 24
	},
	{
	"epoch": 0.01112099644128114,
	"grad_norm": 0.31996771693229675,
	"learning_rate": 0.00019597989949748744,
	"loss": 1.042,
	"step": 25
	},
	{
	"epoch": 0.011565836298932384,
	"grad_norm": 0.49899256229400635,
	"learning_rate": 0.00019577889447236181,
	"loss": 1.0384,
	"step": 26
	},
	{
	"epoch": 0.01201067615658363,
	"grad_norm": 0.37340229749679565,
	"learning_rate": 0.0001955778894472362,
	"loss": 1.0307,
	"step": 27
	},
	{
	"epoch": 0.012455516014234875,
	"grad_norm": 0.33216455578804016,
	"learning_rate": 0.00019537688442211056,
	"loss": 1.0253,
	"step": 28
	},
	{
	"epoch": 0.012900355871886121,
	"grad_norm": 0.3582470715045929,
	"learning_rate": 0.00019517587939698493,
	"loss": 1.0751,
	"step": 29
	},
	{
	"epoch": 0.013345195729537367,
	"grad_norm": 0.3682143986225128,
	"learning_rate": 0.0001949748743718593,
	"loss": 1.0633,
	"step": 30
	},
	{
	"epoch": 0.013790035587188613,
	"grad_norm": 0.4105437695980072,
	"learning_rate": 0.00019477386934673368,
	"loss": 1.1865,
	"step": 31
	},
	{
	"epoch": 0.014234875444839857,
	"grad_norm": 0.3709512948989868,
	"learning_rate": 0.00019457286432160805,
	"loss": 1.0198,
	"step": 32
	},
	{
	"epoch": 0.014679715302491103,
	"grad_norm": 0.3693946599960327,
	"learning_rate": 0.00019437185929648243,
	"loss": 1.0405,
	"step": 33
	},
	{
	"epoch": 0.015124555160142349,
	"grad_norm": 0.335426390171051,
	"learning_rate": 0.0001941708542713568,
	"loss": 0.9301,
	"step": 34
	},
	{
	"epoch": 0.015569395017793594,
	"grad_norm": 0.5277281403541565,
	"learning_rate": 0.00019396984924623117,
	"loss": 0.9615,
	"step": 35
	},
	{
	"epoch": 0.01601423487544484,
	"grad_norm": 0.40925148129463196,
	"learning_rate": 0.00019376884422110552,
	"loss": 1.1093,
	"step": 36
	},
	{
	"epoch": 0.016459074733096084,
	"grad_norm": 0.40884852409362793,
	"learning_rate": 0.00019356783919597992,
	"loss": 1.063,
	"step": 37
	},
	{
	"epoch": 0.016903914590747332,
	"grad_norm": 0.3492753207683563,
	"learning_rate": 0.0001933668341708543,
	"loss": 1.0865,
	"step": 38
	},
	{
	"epoch": 0.017348754448398576,
	"grad_norm": 0.3660659193992615,
	"learning_rate": 0.00019316582914572864,
	"loss": 1.071,
	"step": 39
	},
	{
	"epoch": 0.017793594306049824,
	"grad_norm": 0.3863303065299988,
	"learning_rate": 0.000192964824120603,
	"loss": 1.0096,
	"step": 40
	},
	{
	"epoch": 0.018238434163701068,
	"grad_norm": 0.34570327401161194,
	"learning_rate": 0.0001927638190954774,
	"loss": 1.0753,
	"step": 41
	},
	{
	"epoch": 0.018683274021352312,
	"grad_norm": 0.35912126302719116,
	"learning_rate": 0.00019256281407035178,
	"loss": 1.0997,
	"step": 42
	},
	{
	"epoch": 0.01912811387900356,
	"grad_norm": 0.36907467246055603,
	"learning_rate": 0.00019236180904522613,
	"loss": 1.0419,
	"step": 43
	},
	{
	"epoch": 0.019572953736654804,
	"grad_norm": 0.3746166229248047,
	"learning_rate": 0.0001921608040201005,
	"loss": 1.0424,
	"step": 44
	},
	{
	"epoch": 0.02001779359430605,
	"grad_norm": 0.33465832471847534,
	"learning_rate": 0.0001919597989949749,
	"loss": 1.0335,
	"step": 45
	},
	{
	"epoch": 0.020462633451957295,
	"grad_norm": 0.3497348427772522,
	"learning_rate": 0.00019175879396984925,
	"loss": 1.0282,
	"step": 46
	},
	{
	"epoch": 0.02090747330960854,
	"grad_norm": 0.34102576971054077,
	"learning_rate": 0.00019155778894472362,
	"loss": 1.1197,
	"step": 47
	},
	{
	"epoch": 0.021352313167259787,
	"grad_norm": 0.31394055485725403,
	"learning_rate": 0.000191356783919598,
	"loss": 1.0298,
	"step": 48
	},
	{
	"epoch": 0.02179715302491103,
	"grad_norm": 0.505631148815155,
	"learning_rate": 0.0001911557788944724,
	"loss": 1.0349,
	"step": 49
	},
	{
	"epoch": 0.02224199288256228,
	"grad_norm": 0.3988608121871948,
	"learning_rate": 0.00019095477386934674,
	"loss": 0.9868,
	"step": 50
	},
	{
	"epoch": 0.022686832740213523,
	"grad_norm": 0.3646712899208069,
	"learning_rate": 0.0001907537688442211,
	"loss": 1.0884,
	"step": 51
	},
	{
	"epoch": 0.023131672597864767,
	"grad_norm": 0.35094475746154785,
	"learning_rate": 0.00019055276381909548,
	"loss": 1.1207,
	"step": 52
	},
	{
	"epoch": 0.023576512455516015,
	"grad_norm": 0.34856799244880676,
	"learning_rate": 0.00019035175879396986,
	"loss": 1.0121,
	"step": 53
	},
	{
	"epoch": 0.02402135231316726,
	"grad_norm": 0.37425950169563293,
	"learning_rate": 0.00019015075376884423,
	"loss": 1.1073,
	"step": 54
	},
	{
	"epoch": 0.024466192170818506,
	"grad_norm": 0.3625994026660919,
	"learning_rate": 0.0001899497487437186,
	"loss": 1.0863,
	"step": 55
	},
	{
	"epoch": 0.02491103202846975,
	"grad_norm": 0.295337051153183,
	"learning_rate": 0.00018974874371859298,
	"loss": 1.0561,
	"step": 56
	},
	{
	"epoch": 0.025355871886120998,
	"grad_norm": 0.34300458431243896,
	"learning_rate": 0.00018954773869346732,
	"loss": 0.9955,
	"step": 57
	},
	{
	"epoch": 0.025800711743772242,
	"grad_norm": 0.45535075664520264,
	"learning_rate": 0.00018934673366834172,
	"loss": 1.0623,
	"step": 58
	},
	{
	"epoch": 0.026245551601423486,
	"grad_norm": 0.3560766577720642,
	"learning_rate": 0.0001891457286432161,
	"loss": 1.0259,
	"step": 59
	},
	{
	"epoch": 0.026690391459074734,
	"grad_norm": 0.33135348558425903,
	"learning_rate": 0.00018894472361809047,
	"loss": 1.0823,
	"step": 60
	},
	{
	"epoch": 0.027135231316725978,
	"grad_norm": 0.33519864082336426,
	"learning_rate": 0.00018874371859296481,
	"loss": 0.8893,
	"step": 61
	},
	{
	"epoch": 0.027580071174377226,
	"grad_norm": 0.4156452417373657,
	"learning_rate": 0.00018854271356783921,
	"loss": 0.888,
	"step": 62
	},
	{
	"epoch": 0.02802491103202847,
	"grad_norm": 0.5566922426223755,
	"learning_rate": 0.0001883417085427136,
	"loss": 0.9465,
	"step": 63
	},
	{
	"epoch": 0.028469750889679714,
	"grad_norm": 0.3567597270011902,
	"learning_rate": 0.00018814070351758793,
	"loss": 1.044,
	"step": 64
	},
	{
	"epoch": 0.02891459074733096,
	"grad_norm": 0.3937987685203552,
	"learning_rate": 0.0001879396984924623,
	"loss": 1.1491,
	"step": 65
	},
	{
	"epoch": 0.029359430604982206,
	"grad_norm": 0.3297279477119446,
	"learning_rate": 0.0001877386934673367,
	"loss": 1.1387,
	"step": 66
	},
	{
	"epoch": 0.029804270462633453,
	"grad_norm": 0.4475502371788025,
	"learning_rate": 0.00018753768844221108,
	"loss": 1.1832,
	"step": 67
	},
	{
	"epoch": 0.030249110320284697,
	"grad_norm": 0.4012911021709442,
	"learning_rate": 0.00018733668341708543,
	"loss": 1.1017,
	"step": 68
	},
	{
	"epoch": 0.03069395017793594,
	"grad_norm": 0.36798158288002014,
	"learning_rate": 0.0001871356783919598,
	"loss": 0.9889,
	"step": 69
	},
	{
	"epoch": 0.03113879003558719,
	"grad_norm": 0.3330882787704468,
	"learning_rate": 0.0001869346733668342,
	"loss": 1.0825,
	"step": 70
	},
	{
	"epoch": 0.03158362989323844,
	"grad_norm": 0.41344648599624634,
	"learning_rate": 0.00018673366834170854,
	"loss": 1.0599,
	"step": 71
	},
	{
	"epoch": 0.03202846975088968,
	"grad_norm": 0.5002074837684631,
	"learning_rate": 0.00018653266331658292,
	"loss": 1.0586,
	"step": 72
	},
	{
	"epoch": 0.032473309608540925,
	"grad_norm": 0.3306974470615387,
	"learning_rate": 0.0001863316582914573,
	"loss": 1.0004,
	"step": 73
	},
	{
	"epoch": 0.03291814946619217,
	"grad_norm": 0.394704133272171,
	"learning_rate": 0.0001861306532663317,
	"loss": 1.0083,
	"step": 74
	},
	{
	"epoch": 0.03336298932384341,
	"grad_norm": 0.3663751184940338,
	"learning_rate": 0.00018592964824120604,
	"loss": 0.8553,
	"step": 75
	},
	{
	"epoch": 0.033807829181494664,
	"grad_norm": 0.37072721123695374,
	"learning_rate": 0.0001857286432160804,
	"loss": 1.1199,
	"step": 76
	},
	{
	"epoch": 0.03425266903914591,
	"grad_norm": 0.37048038840293884,
	"learning_rate": 0.00018552763819095478,
	"loss": 1.0716,
	"step": 77
	},
	{
	"epoch": 0.03469750889679715,
	"grad_norm": 0.34426698088645935,
	"learning_rate": 0.00018532663316582915,
	"loss": 1.0378,
	"step": 78
	},
	{
	"epoch": 0.035142348754448396,
	"grad_norm": 0.3649255037307739,
	"learning_rate": 0.00018512562814070353,
	"loss": 1.0843,
	"step": 79
	},
	{
	"epoch": 0.03558718861209965,
	"grad_norm": 0.37399303913116455,
	"learning_rate": 0.0001849246231155779,
	"loss": 0.9741,
	"step": 80
	},
	{
	"epoch": 0.03603202846975089,
	"grad_norm": 0.3313739001750946,
	"learning_rate": 0.00018472361809045227,
	"loss": 0.9883,
	"step": 81
	},
	{
	"epoch": 0.036476868327402136,
	"grad_norm": 0.36365818977355957,
	"learning_rate": 0.00018452261306532662,
	"loss": 1.0434,
	"step": 82
	},
	{
	"epoch": 0.03692170818505338,
	"grad_norm": 0.33248934149742126,
	"learning_rate": 0.00018432160804020102,
	"loss": 1.0635,
	"step": 83
	},
	{
	"epoch": 0.037366548042704624,
	"grad_norm": 0.361134797334671,
	"learning_rate": 0.0001841206030150754,
	"loss": 1.0248,
	"step": 84
	},
	{
	"epoch": 0.037811387900355875,
	"grad_norm": 0.3499568700790405,
	"learning_rate": 0.00018391959798994977,
	"loss": 0.9905,
	"step": 85
	},
	{
	"epoch": 0.03825622775800712,
	"grad_norm": 0.3246215879917145,
	"learning_rate": 0.0001837185929648241,
	"loss": 1.0431,
	"step": 86
	},
	{
	"epoch": 0.03870106761565836,
	"grad_norm": 0.39000576734542847,
	"learning_rate": 0.0001835175879396985,
	"loss": 0.995,
	"step": 87
	},
	{
	"epoch": 0.03914590747330961,
	"grad_norm": 0.32663071155548096,
	"learning_rate": 0.00018331658291457288,
	"loss": 1.0976,
	"step": 88
	},
	{
	"epoch": 0.03959074733096085,
	"grad_norm": 0.35704660415649414,
	"learning_rate": 0.00018311557788944723,
	"loss": 1.088,
	"step": 89
	},
	{
	"epoch": 0.0400355871886121,
	"grad_norm": 0.3753162622451782,
	"learning_rate": 0.0001829145728643216,
	"loss": 1.0509,
	"step": 90
	},
	{
	"epoch": 0.04048042704626335,
	"grad_norm": 0.45394080877304077,
	"learning_rate": 0.000182713567839196,
	"loss": 0.8953,
	"step": 91
	},
	{
	"epoch": 0.04092526690391459,
	"grad_norm": 0.35750091075897217,
	"learning_rate": 0.00018251256281407038,
	"loss": 1.0031,
	"step": 92
	},
	{
	"epoch": 0.041370106761565835,
	"grad_norm": 0.2915020287036896,
	"learning_rate": 0.00018231155778894472,
	"loss": 0.9292,
	"step": 93
	},
	{
	"epoch": 0.04181494661921708,
	"grad_norm": 0.29004380106925964,
	"learning_rate": 0.0001821105527638191,
	"loss": 0.9423,
	"step": 94
	},
	{
	"epoch": 0.04225978647686833,
	"grad_norm": 0.3510340750217438,
	"learning_rate": 0.0001819095477386935,
	"loss": 0.9933,
	"step": 95
	},
	{
	"epoch": 0.042704626334519574,
	"grad_norm": 0.3953409194946289,
	"learning_rate": 0.00018170854271356784,
	"loss": 1.0174,
	"step": 96
	},
	{
	"epoch": 0.04314946619217082,
	"grad_norm": 0.3693845272064209,
	"learning_rate": 0.00018150753768844221,
	"loss": 1.1309,
	"step": 97
	},
	{
	"epoch": 0.04359430604982206,
	"grad_norm": 0.33780723810195923,
	"learning_rate": 0.0001813065326633166,
	"loss": 0.995,
	"step": 98
	},
	{
	"epoch": 0.04403914590747331,
	"grad_norm": 0.39444178342819214,
	"learning_rate": 0.00018110552763819096,
	"loss": 1.0567,
	"step": 99
	},
	{
	"epoch": 0.04448398576512456,
	"grad_norm": 0.3430976867675781,
	"learning_rate": 0.00018090452261306533,
	"loss": 0.956,
	"step": 100
	},
	{
	"epoch": 0.0449288256227758,
	"grad_norm": 0.31412309408187866,
	"learning_rate": 0.0001807035175879397,
	"loss": 1.1141,
	"step": 101
	},
	{
	"epoch": 0.045373665480427046,
	"grad_norm": 0.3321763575077057,
	"learning_rate": 0.00018050251256281408,
	"loss": 1.0245,
	"step": 102
	},
	{
	"epoch": 0.04581850533807829,
	"grad_norm": 0.3730502128601074,
	"learning_rate": 0.00018030150753768845,
	"loss": 1.0961,
	"step": 103
	},
	{
	"epoch": 0.046263345195729534,
	"grad_norm": 0.33242374658584595,
	"learning_rate": 0.00018010050251256282,
	"loss": 0.9899,
	"step": 104
	},
	{
	"epoch": 0.046708185053380785,
	"grad_norm": 0.3853437900543213,
	"learning_rate": 0.0001798994974874372,
	"loss": 0.9559,
	"step": 105
	},
	{
	"epoch": 0.04715302491103203,
	"grad_norm": 0.33745744824409485,
	"learning_rate": 0.00017969849246231157,
	"loss": 1.0458,
	"step": 106
	},
	{
	"epoch": 0.04759786476868327,
	"grad_norm": 0.3601361811161041,
	"learning_rate": 0.00017949748743718592,
	"loss": 1.0801,
	"step": 107
	},
	{
	"epoch": 0.04804270462633452,
	"grad_norm": 0.4157007932662964,
	"learning_rate": 0.00017929648241206032,
	"loss": 0.9852,
	"step": 108
	},
	{
	"epoch": 0.04848754448398576,
	"grad_norm": 0.2746104598045349,
	"learning_rate": 0.0001790954773869347,
	"loss": 1.0247,
	"step": 109
	},
	{
	"epoch": 0.04893238434163701,
	"grad_norm": 0.3099765181541443,
	"learning_rate": 0.00017889447236180906,
	"loss": 0.9642,
	"step": 110
	},
	{
	"epoch": 0.04937722419928826,
	"grad_norm": 0.3376833498477936,
	"learning_rate": 0.0001786934673366834,
	"loss": 1.0203,
	"step": 111
	},
	{
	"epoch": 0.0498220640569395,
	"grad_norm": 0.3929193913936615,
	"learning_rate": 0.0001784924623115578,
	"loss": 1.1084,
	"step": 112
	},
	{
	"epoch": 0.050266903914590745,
	"grad_norm": 0.3145126402378082,
	"learning_rate": 0.00017829145728643218,
	"loss": 1.124,
	"step": 113
	},
	{
	"epoch": 0.050711743772241996,
	"grad_norm": 0.3065921664237976,
	"learning_rate": 0.00017809045226130653,
	"loss": 1.0453,
	"step": 114
	},
	{
	"epoch": 0.05115658362989324,
	"grad_norm": 0.3715677857398987,
	"learning_rate": 0.0001778894472361809,
	"loss": 0.9841,
	"step": 115
	},
	{
	"epoch": 0.051601423487544484,
	"grad_norm": 0.36604928970336914,
	"learning_rate": 0.0001776884422110553,
	"loss": 0.9419,
	"step": 116
	},
	{
	"epoch": 0.05204626334519573,
	"grad_norm": 0.37098243832588196,
	"learning_rate": 0.00017748743718592967,
	"loss": 1.0133,
	"step": 117
	},
	{
	"epoch": 0.05249110320284697,
	"grad_norm": 0.3711595833301544,
	"learning_rate": 0.00017728643216080402,
	"loss": 1.0324,
	"step": 118
	},
	{
	"epoch": 0.052935943060498224,
	"grad_norm": 0.3096163868904114,
	"learning_rate": 0.0001770854271356784,
	"loss": 1.0086,
	"step": 119
	},
	{
	"epoch": 0.05338078291814947,
	"grad_norm": 0.4001842737197876,
	"learning_rate": 0.0001768844221105528,
	"loss": 1.026,
	"step": 120
	},
	{
	"epoch": 0.05382562277580071,
	"grad_norm": 0.29020652174949646,
	"learning_rate": 0.00017668341708542714,
	"loss": 0.9578,
	"step": 121
	},
	{
	"epoch": 0.054270462633451956,
	"grad_norm": 0.35916441679000854,
	"learning_rate": 0.0001764824120603015,
	"loss": 1.0502,
	"step": 122
	},
	{
	"epoch": 0.0547153024911032,
	"grad_norm": 0.36931201815605164,
	"learning_rate": 0.00017628140703517588,
	"loss": 0.9456,
	"step": 123
	},
	{
	"epoch": 0.05516014234875445,
	"grad_norm": 0.37013575434684753,
	"learning_rate": 0.00017608040201005026,
	"loss": 1.0472,
	"step": 124
	},
	{
	"epoch": 0.055604982206405695,
	"grad_norm": 0.3738909959793091,
	"learning_rate": 0.00017587939698492463,
	"loss": 1.0194,
	"step": 125
	},
	{
	"epoch": 0.05604982206405694,
	"grad_norm": 0.4676179885864258,
	"learning_rate": 0.000175678391959799,
	"loss": 1.1556,
	"step": 126
	},
	{
	"epoch": 0.056494661921708184,
	"grad_norm": 0.430128812789917,
	"learning_rate": 0.00017547738693467338,
	"loss": 1.1472,
	"step": 127
	},
	{
	"epoch": 0.05693950177935943,
	"grad_norm": 0.3177819848060608,
	"learning_rate": 0.00017527638190954775,
	"loss": 1.0159,
	"step": 128
	},
	{
	"epoch": 0.05738434163701068,
	"grad_norm": 0.35535189509391785,
	"learning_rate": 0.00017507537688442212,
	"loss": 1.0388,
	"step": 129
	},
	{
	"epoch": 0.05782918149466192,
	"grad_norm": 0.41556811332702637,
	"learning_rate": 0.0001748743718592965,
	"loss": 1.0366,
	"step": 130
	},
	{
	"epoch": 0.05827402135231317,
	"grad_norm": 0.28733861446380615,
	"learning_rate": 0.00017467336683417087,
	"loss": 1.041,
	"step": 131
	},
	{
	"epoch": 0.05871886120996441,
	"grad_norm": 0.36875680088996887,
	"learning_rate": 0.00017447236180904521,
	"loss": 1.0408,
	"step": 132
	},
	{
	"epoch": 0.059163701067615655,
	"grad_norm": 0.30114784836769104,
	"learning_rate": 0.00017427135678391961,
	"loss": 1.0582,
	"step": 133
	},
	{
	"epoch": 0.059608540925266906,
	"grad_norm": 0.397324800491333,
	"learning_rate": 0.000174070351758794,
	"loss": 1.0715,
	"step": 134
	},
	{
	"epoch": 0.06005338078291815,
	"grad_norm": 0.30825501680374146,
	"learning_rate": 0.00017386934673366836,
	"loss": 0.8977,
	"step": 135
	},
	{
	"epoch": 0.060498220640569395,
	"grad_norm": 0.34875619411468506,
	"learning_rate": 0.0001736683417085427,
	"loss": 1.0427,
	"step": 136
	},
	{
	"epoch": 0.06094306049822064,
	"grad_norm": 0.44976702332496643,
	"learning_rate": 0.0001734673366834171,
	"loss": 1.0594,
	"step": 137
	},
	{
	"epoch": 0.06138790035587188,
	"grad_norm": 0.4009503722190857,
	"learning_rate": 0.00017326633165829148,
	"loss": 1.0156,
	"step": 138
	},
	{
	"epoch": 0.061832740213523134,
	"grad_norm": 0.32680168747901917,
	"learning_rate": 0.00017306532663316582,
	"loss": 1.0518,
	"step": 139
	},
	{
	"epoch": 0.06227758007117438,
	"grad_norm": 0.3297230005264282,
	"learning_rate": 0.0001728643216080402,
	"loss": 0.982,
	"step": 140
	},
	{
	"epoch": 0.06272241992882563,
	"grad_norm": 0.3223113417625427,
	"learning_rate": 0.0001726633165829146,
	"loss": 0.9451,
	"step": 141
	},
	{
	"epoch": 0.06316725978647687,
	"grad_norm": 0.3400196135044098,
	"learning_rate": 0.00017246231155778897,
	"loss": 0.9247,
	"step": 142
	},
	{
	"epoch": 0.06361209964412812,
	"grad_norm": 0.36805739998817444,
	"learning_rate": 0.00017226130653266332,
	"loss": 1.0732,
	"step": 143
	},
	{
	"epoch": 0.06405693950177936,
	"grad_norm": 0.40036970376968384,
	"learning_rate": 0.0001720603015075377,
	"loss": 1.0966,
	"step": 144
	},
	{
	"epoch": 0.0645017793594306,
	"grad_norm": 0.3755742311477661,
	"learning_rate": 0.00017185929648241206,
	"loss": 0.9863,
	"step": 145
	},
	{
	"epoch": 0.06494661921708185,
	"grad_norm": 0.4415525496006012,
	"learning_rate": 0.00017165829145728644,
	"loss": 1.0933,
	"step": 146
	},
	{
	"epoch": 0.0653914590747331,
	"grad_norm": 0.3682146966457367,
	"learning_rate": 0.0001714572864321608,
	"loss": 0.9468,
	"step": 147
	},
	{
	"epoch": 0.06583629893238434,
	"grad_norm": 0.3670349717140198,
	"learning_rate": 0.00017125628140703518,
	"loss": 1.1367,
	"step": 148
	},
	{
	"epoch": 0.06628113879003558,
	"grad_norm": 0.30022603273391724,
	"learning_rate": 0.00017105527638190955,
	"loss": 0.9387,
	"step": 149
	},
	{
	"epoch": 0.06672597864768683,
	"grad_norm": 0.36275291442871094,
	"learning_rate": 0.00017085427135678393,
	"loss": 0.8882,
	"step": 150
	},
	{
	"epoch": 0.06717081850533808,
	"grad_norm": 0.4646683633327484,
	"learning_rate": 0.0001706532663316583,
	"loss": 0.8556,
	"step": 151
	},
	{
	"epoch": 0.06761565836298933,
	"grad_norm": 0.34833550453186035,
	"learning_rate": 0.00017045226130653267,
	"loss": 1.0124,
	"step": 152
	},
	{
	"epoch": 0.06806049822064057,
	"grad_norm": 0.34811916947364807,
	"learning_rate": 0.00017025125628140705,
	"loss": 0.8668,
	"step": 153
	},
	{
	"epoch": 0.06850533807829182,
	"grad_norm": 0.42011362314224243,
	"learning_rate": 0.00017005025125628142,
	"loss": 0.9818,
	"step": 154
	},
	{
	"epoch": 0.06895017793594306,
	"grad_norm": 0.34665143489837646,
	"learning_rate": 0.0001698492462311558,
	"loss": 0.9958,
	"step": 155
	},
	{
	"epoch": 0.0693950177935943,
	"grad_norm": 0.3390451967716217,
	"learning_rate": 0.00016964824120603016,
	"loss": 0.9835,
	"step": 156
	},
	{
	"epoch": 0.06983985765124555,
	"grad_norm": 0.3538704812526703,
	"learning_rate": 0.0001694472361809045,
	"loss": 1.102,
	"step": 157
	},
	{
	"epoch": 0.07028469750889679,
	"grad_norm": 0.40229830145835876,
	"learning_rate": 0.0001692462311557789,
	"loss": 1.1512,
	"step": 158
	},
	{
	"epoch": 0.07072953736654804,
	"grad_norm": 0.37556755542755127,
	"learning_rate": 0.00016904522613065328,
	"loss": 1.0165,
	"step": 159
	},
	{
	"epoch": 0.0711743772241993,
	"grad_norm": 0.33789652585983276,
	"learning_rate": 0.00016884422110552766,
	"loss": 0.9962,
	"step": 160
	},
	{
	"epoch": 0.07161921708185054,
	"grad_norm": 0.39385226368904114,
	"learning_rate": 0.000168643216080402,
	"loss": 1.0538,
	"step": 161
	},
	{
	"epoch": 0.07206405693950178,
	"grad_norm": 0.39948439598083496,
	"learning_rate": 0.0001684422110552764,
	"loss": 1.0472,
	"step": 162
	},
	{
	"epoch": 0.07250889679715303,
	"grad_norm": 0.3786976635456085,
	"learning_rate": 0.00016824120603015078,
	"loss": 1.0288,
	"step": 163
	},
	{
	"epoch": 0.07295373665480427,
	"grad_norm": 0.3729378879070282,
	"learning_rate": 0.00016804020100502512,
	"loss": 0.9856,
	"step": 164
	},
	{
	"epoch": 0.07339857651245552,
	"grad_norm": 0.36763879656791687,
	"learning_rate": 0.0001678391959798995,
	"loss": 0.9711,
	"step": 165
	},
	{
	"epoch": 0.07384341637010676,
	"grad_norm": 0.38680917024612427,
	"learning_rate": 0.0001676381909547739,
	"loss": 0.8915,
	"step": 166
	},
	{
	"epoch": 0.074288256227758,
	"grad_norm": 0.4449096620082855,
	"learning_rate": 0.00016743718592964827,
	"loss": 0.9931,
	"step": 167
	},
	{
	"epoch": 0.07473309608540925,
	"grad_norm": 0.39082011580467224,
	"learning_rate": 0.0001672361809045226,
	"loss": 1.0084,
	"step": 168
	},
	{
	"epoch": 0.07517793594306049,
	"grad_norm": 0.37978312373161316,
	"learning_rate": 0.00016703517587939699,
	"loss": 0.9449,
	"step": 169
	},
	{
	"epoch": 0.07562277580071175,
	"grad_norm": 0.32479336857795715,
	"learning_rate": 0.00016683417085427136,
	"loss": 0.8941,
	"step": 170
	},
	{
	"epoch": 0.076067615658363,
	"grad_norm": 0.35645660758018494,
	"learning_rate": 0.00016663316582914573,
	"loss": 0.9952,
	"step": 171
	},
	{
	"epoch": 0.07651245551601424,
	"grad_norm": 0.40985000133514404,
	"learning_rate": 0.0001664321608040201,
	"loss": 1.0187,
	"step": 172
	},
	{
	"epoch": 0.07695729537366548,
	"grad_norm": 0.4032020568847656,
	"learning_rate": 0.00016623115577889448,
	"loss": 0.9356,
	"step": 173
	},
	{
	"epoch": 0.07740213523131673,
	"grad_norm": 0.3428996801376343,
	"learning_rate": 0.00016603015075376885,
	"loss": 0.9326,
	"step": 174
	},
	{
	"epoch": 0.07784697508896797,
	"grad_norm": 0.2806510925292969,
	"learning_rate": 0.00016582914572864322,
	"loss": 0.6118,
	"step": 175
	},
	{
	"epoch": 0.07829181494661921,
	"grad_norm": 0.3010331392288208,
	"learning_rate": 0.0001656281407035176,
	"loss": 1.0065,
	"step": 176
	},
	{
	"epoch": 0.07873665480427046,
	"grad_norm": 0.2989744246006012,
	"learning_rate": 0.00016542713567839197,
	"loss": 0.948,
	"step": 177
	},
	{
	"epoch": 0.0791814946619217,
	"grad_norm": 0.3374849259853363,
	"learning_rate": 0.00016522613065326634,
	"loss": 1.0899,
	"step": 178
	},
	{
	"epoch": 0.07962633451957295,
	"grad_norm": 0.38859498500823975,
	"learning_rate": 0.00016502512562814072,
	"loss": 1.035,
	"step": 179
	},
	{
	"epoch": 0.0800711743772242,
	"grad_norm": 0.34474217891693115,
	"learning_rate": 0.0001648241206030151,
	"loss": 0.9826,
	"step": 180
	},
	{
	"epoch": 0.08051601423487545,
	"grad_norm": 0.2957572937011719,
	"learning_rate": 0.00016462311557788946,
	"loss": 0.8905,
	"step": 181
	},
	{
	"epoch": 0.0809608540925267,
	"grad_norm": 0.3576017916202545,
	"learning_rate": 0.0001644221105527638,
	"loss": 1.0266,
	"step": 182
	},
	{
	"epoch": 0.08140569395017794,
	"grad_norm": 0.38848915696144104,
	"learning_rate": 0.0001642211055276382,
	"loss": 0.9883,
	"step": 183
	},
	{
	"epoch": 0.08185053380782918,
	"grad_norm": 0.4087153375148773,
	"learning_rate": 0.00016402010050251258,
	"loss": 1.1882,
	"step": 184
	},
	{
	"epoch": 0.08229537366548043,
	"grad_norm": 0.3468906879425049,
	"learning_rate": 0.00016381909547738695,
	"loss": 0.9818,
	"step": 185
	},
	{
	"epoch": 0.08274021352313167,
	"grad_norm": 0.4617744982242584,
	"learning_rate": 0.0001636180904522613,
	"loss": 0.9065,
	"step": 186
	},
	{
	"epoch": 0.08318505338078291,
	"grad_norm": 0.4207713305950165,
	"learning_rate": 0.0001634170854271357,
	"loss": 0.9988,
	"step": 187
	},
	{
	"epoch": 0.08362989323843416,
	"grad_norm": 0.3632107675075531,
	"learning_rate": 0.00016321608040201007,
	"loss": 0.9049,
	"step": 188
	},
	{
	"epoch": 0.08407473309608542,
	"grad_norm": 0.4303954839706421,
	"learning_rate": 0.00016301507537688442,
	"loss": 0.9801,
	"step": 189
	},
	{
	"epoch": 0.08451957295373666,
	"grad_norm": 0.3233638107776642,
	"learning_rate": 0.0001628140703517588,
	"loss": 1.1457,
	"step": 190
	},
	{
	"epoch": 0.0849644128113879,
	"grad_norm": 0.36270812153816223,
	"learning_rate": 0.00016261306532663316,
	"loss": 0.9804,
	"step": 191
	},
	{
	"epoch": 0.08540925266903915,
	"grad_norm": 0.3483213484287262,
	"learning_rate": 0.00016241206030150756,
	"loss": 0.9412,
	"step": 192
	},
	{
	"epoch": 0.08585409252669039,
	"grad_norm": 0.33415859937667847,
	"learning_rate": 0.0001622110552763819,
	"loss": 1.0479,
	"step": 193
	},
	{
	"epoch": 0.08629893238434164,
	"grad_norm": 0.3390806317329407,
	"learning_rate": 0.00016201005025125628,
	"loss": 1.0266,
	"step": 194
	},
	{
	"epoch": 0.08674377224199288,
	"grad_norm": 0.3439917266368866,
	"learning_rate": 0.00016180904522613066,
	"loss": 0.9495,
	"step": 195
	},
	{
	"epoch": 0.08718861209964412,
	"grad_norm": 0.5404065847396851,
	"learning_rate": 0.00016160804020100503,
	"loss": 1.0176,
	"step": 196
	},
	{
	"epoch": 0.08763345195729537,
	"grad_norm": 0.40834537148475647,
	"learning_rate": 0.0001614070351758794,
	"loss": 1.0119,
	"step": 197
	},
	{
	"epoch": 0.08807829181494661,
	"grad_norm": 0.41839244961738586,
	"learning_rate": 0.00016120603015075378,
	"loss": 1.0006,
	"step": 198
	},
	{
	"epoch": 0.08852313167259787,
	"grad_norm": 0.36179983615875244,
	"learning_rate": 0.00016100502512562815,
	"loss": 1.0247,
	"step": 199
	},
	{
	"epoch": 0.08896797153024912,
	"grad_norm": 0.34288227558135986,
	"learning_rate": 0.00016080402010050252,
	"loss": 0.9545,
	"step": 200
	},
	{
	"epoch": 0.08941281138790036,
	"grad_norm": 0.37949255108833313,
	"learning_rate": 0.0001606030150753769,
	"loss": 1.0024,
	"step": 201
	},
	{
	"epoch": 0.0898576512455516,
	"grad_norm": 0.4100090563297272,
	"learning_rate": 0.00016040201005025127,
	"loss": 1.0465,
	"step": 202
	},
	{
	"epoch": 0.09030249110320285,
	"grad_norm": 0.3741397559642792,
	"learning_rate": 0.00016020100502512564,
	"loss": 0.8969,
	"step": 203
	},
	{
	"epoch": 0.09074733096085409,
	"grad_norm": 0.4092821180820465,
	"learning_rate": 0.00016,
	"loss": 1.3054,
	"step": 204
	},
	{
	"epoch": 0.09119217081850534,
	"grad_norm": 0.29491907358169556,
	"learning_rate": 0.00015979899497487439,
	"loss": 1.0039,
	"step": 205
	},
	{
	"epoch": 0.09163701067615658,
	"grad_norm": 0.38052767515182495,
	"learning_rate": 0.00015959798994974876,
	"loss": 0.9034,
	"step": 206
	},
	{
	"epoch": 0.09208185053380782,
	"grad_norm": 0.34436798095703125,
	"learning_rate": 0.0001593969849246231,
	"loss": 0.9621,
	"step": 207
	},
	{
	"epoch": 0.09252669039145907,
	"grad_norm": 0.41748183965682983,
	"learning_rate": 0.0001591959798994975,
	"loss": 1.0112,
	"step": 208
	},
	{
	"epoch": 0.09297153024911033,
	"grad_norm": 0.35025399923324585,
	"learning_rate": 0.00015899497487437188,
	"loss": 1.0594,
	"step": 209
	},
	{
	"epoch": 0.09341637010676157,
	"grad_norm": 0.3578777611255646,
	"learning_rate": 0.00015879396984924625,
	"loss": 1.0191,
	"step": 210
	},
	{
	"epoch": 0.09386120996441281,
	"grad_norm": 0.3903699517250061,
	"learning_rate": 0.0001585929648241206,
	"loss": 1.0492,
	"step": 211
	},
	{
	"epoch": 0.09430604982206406,
	"grad_norm": 0.34571701288223267,
	"learning_rate": 0.000158391959798995,
	"loss": 1.07,
	"step": 212
	},
	{
	"epoch": 0.0947508896797153,
	"grad_norm": 0.3766401410102844,
	"learning_rate": 0.00015819095477386937,
	"loss": 0.9508,
	"step": 213
	},
	{
	"epoch": 0.09519572953736655,
	"grad_norm": 0.3765754699707031,
	"learning_rate": 0.00015798994974874372,
	"loss": 0.9604,
	"step": 214
	},
	{
	"epoch": 0.09564056939501779,
	"grad_norm": 0.39237186312675476,
	"learning_rate": 0.0001577889447236181,
	"loss": 0.9678,
	"step": 215
	},
	{
	"epoch": 0.09608540925266904,
	"grad_norm": 0.43379107117652893,
	"learning_rate": 0.00015758793969849246,
	"loss": 0.9569,
	"step": 216
	},
	{
	"epoch": 0.09653024911032028,
	"grad_norm": 0.44235309958457947,
	"learning_rate": 0.00015738693467336686,
	"loss": 1.047,
	"step": 217
	},
	{
	"epoch": 0.09697508896797152,
	"grad_norm": 0.37492284178733826,
	"learning_rate": 0.0001571859296482412,
	"loss": 0.9247,
	"step": 218
	},
	{
	"epoch": 0.09741992882562278,
	"grad_norm": 0.44187766313552856,
	"learning_rate": 0.00015698492462311558,
	"loss": 1.0136,
	"step": 219
	},
	{
	"epoch": 0.09786476868327403,
	"grad_norm": 0.41393113136291504,
	"learning_rate": 0.00015678391959798995,
	"loss": 1.0051,
	"step": 220
	},
	{
	"epoch": 0.09830960854092527,
	"grad_norm": 0.3643686771392822,
	"learning_rate": 0.00015658291457286433,
	"loss": 1.1171,
	"step": 221
	},
	{
	"epoch": 0.09875444839857651,
	"grad_norm": 0.351253479719162,
	"learning_rate": 0.0001563819095477387,
	"loss": 1.0466,
	"step": 222
	},
	{
	"epoch": 0.09919928825622776,
	"grad_norm": 0.35708218812942505,
	"learning_rate": 0.00015618090452261307,
	"loss": 1.0688,
	"step": 223
	},
	{
	"epoch": 0.099644128113879,
	"grad_norm": 0.3984560966491699,
	"learning_rate": 0.00015597989949748745,
	"loss": 0.9606,
	"step": 224
	},
	{
	"epoch": 0.10008896797153025,
	"grad_norm": 0.3412669897079468,
	"learning_rate": 0.00015577889447236182,
	"loss": 0.9781,
	"step": 225
	},
	{
	"epoch": 0.10053380782918149,
	"grad_norm": 0.36613014340400696,
	"learning_rate": 0.0001555778894472362,
	"loss": 0.9449,
	"step": 226
	},
	{
	"epoch": 0.10097864768683273,
	"grad_norm": 0.3568494915962219,
	"learning_rate": 0.00015537688442211056,
	"loss": 0.9679,
	"step": 227
	},
	{
	"epoch": 0.10142348754448399,
	"grad_norm": 0.4687019884586334,
	"learning_rate": 0.00015517587939698494,
	"loss": 1.0386,
	"step": 228
	},
	{
	"epoch": 0.10186832740213524,
	"grad_norm": 0.40447989106178284,
	"learning_rate": 0.0001549748743718593,
	"loss": 0.9812,
	"step": 229
	},
	{
	"epoch": 0.10231316725978648,
	"grad_norm": 0.39272576570510864,
	"learning_rate": 0.00015477386934673368,
	"loss": 1.0165,
	"step": 230
	},
	{
	"epoch": 0.10275800711743772,
	"grad_norm": 0.3780848979949951,
	"learning_rate": 0.00015457286432160806,
	"loss": 0.9606,
	"step": 231
	},
	{
	"epoch": 0.10320284697508897,
	"grad_norm": 0.46794167160987854,
	"learning_rate": 0.0001543718592964824,
	"loss": 0.9305,
	"step": 232
	},
	{
	"epoch": 0.10364768683274021,
	"grad_norm": 0.39729002118110657,
	"learning_rate": 0.0001541708542713568,
	"loss": 1.0113,
	"step": 233
	},
	{
	"epoch": 0.10409252669039146,
	"grad_norm": 0.4045659005641937,
	"learning_rate": 0.00015396984924623117,
	"loss": 0.9961,
	"step": 234
	},
	{
	"epoch": 0.1045373665480427,
	"grad_norm": 0.35122302174568176,
	"learning_rate": 0.00015376884422110555,
	"loss": 0.9891,
	"step": 235
	},
	{
	"epoch": 0.10498220640569395,
	"grad_norm": 0.37050357460975647,
	"learning_rate": 0.0001535678391959799,
	"loss": 1.0024,
	"step": 236
	},
	{
	"epoch": 0.10542704626334519,
	"grad_norm": 0.40677326917648315,
	"learning_rate": 0.00015336683417085427,
	"loss": 1.0948,
	"step": 237
	},
	{
	"epoch": 0.10587188612099645,
	"grad_norm": 0.35067057609558105,
	"learning_rate": 0.00015316582914572867,
	"loss": 0.9897,
	"step": 238
	},
	{
	"epoch": 0.10631672597864769,
	"grad_norm": 0.3920201063156128,
	"learning_rate": 0.000152964824120603,
	"loss": 1.0114,
	"step": 239
	},
	{
	"epoch": 0.10676156583629894,
	"grad_norm": 0.3613733649253845,
	"learning_rate": 0.00015276381909547739,
	"loss": 0.9508,
	"step": 240
	},
	{
	"epoch": 0.10720640569395018,
	"grad_norm": 0.35030075907707214,
	"learning_rate": 0.00015256281407035176,
	"loss": 1.0019,
	"step": 241
	},
	{
	"epoch": 0.10765124555160142,
	"grad_norm": 0.4036657512187958,
	"learning_rate": 0.00015236180904522613,
	"loss": 0.9347,
	"step": 242
	},
	{
	"epoch": 0.10809608540925267,
	"grad_norm": 0.3680381178855896,
	"learning_rate": 0.0001521608040201005,
	"loss": 0.9541,
	"step": 243
	},
	{
	"epoch": 0.10854092526690391,
	"grad_norm": 0.7577487826347351,
	"learning_rate": 0.00015195979899497488,
	"loss": 1.0019,
	"step": 244
	},
	{
	"epoch": 0.10898576512455516,
	"grad_norm": 0.32293733954429626,
	"learning_rate": 0.00015175879396984925,
	"loss": 0.9768,
	"step": 245
	},
	{
	"epoch": 0.1094306049822064,
	"grad_norm": 0.32959771156311035,
	"learning_rate": 0.00015155778894472362,
	"loss": 1.0606,
	"step": 246
	},
	{
	"epoch": 0.10987544483985764,
	"grad_norm": 0.3493881821632385,
	"learning_rate": 0.000151356783919598,
	"loss": 1.0317,
	"step": 247
	},
	{
	"epoch": 0.1103202846975089,
	"grad_norm": 0.3482673168182373,
	"learning_rate": 0.00015115577889447237,
	"loss": 0.9157,
	"step": 248
	},
	{
	"epoch": 0.11076512455516015,
	"grad_norm": 0.35461658239364624,
	"learning_rate": 0.00015095477386934674,
	"loss": 1.0753,
	"step": 249
	},
	{
	"epoch": 0.11120996441281139,
	"grad_norm": 0.3124016225337982,
	"learning_rate": 0.00015075376884422112,
	"loss": 0.9352,
	"step": 250
	},
	{
	"epoch": 0.11165480427046263,
	"grad_norm": 0.3451753854751587,
	"learning_rate": 0.0001505527638190955,
	"loss": 0.948,
	"step": 251
	},
	{
	"epoch": 0.11209964412811388,
	"grad_norm": 0.3436938524246216,
	"learning_rate": 0.00015035175879396986,
	"loss": 0.9277,
	"step": 252
	},
	{
	"epoch": 0.11254448398576512,
	"grad_norm": 0.3544940650463104,
	"learning_rate": 0.00015015075376884423,
	"loss": 0.976,
	"step": 253
	},
	{
	"epoch": 0.11298932384341637,
	"grad_norm": 0.34947720170021057,
	"learning_rate": 0.0001499497487437186,
	"loss": 1.0602,
	"step": 254
	},
	{
	"epoch": 0.11343416370106761,
	"grad_norm": 0.38455042243003845,
	"learning_rate": 0.00014974874371859298,
	"loss": 0.9524,
	"step": 255
	},
	{
	"epoch": 0.11387900355871886,
	"grad_norm": 0.4009455442428589,
	"learning_rate": 0.00014954773869346735,
	"loss": 1.0465,
	"step": 256
	},
	{
	"epoch": 0.11432384341637011,
	"grad_norm": 0.5961493849754333,
	"learning_rate": 0.0001493467336683417,
	"loss": 0.8681,
	"step": 257
	},
	{
	"epoch": 0.11476868327402136,
	"grad_norm": 0.47233107686042786,
	"learning_rate": 0.0001491457286432161,
	"loss": 0.9355,
	"step": 258
	},
	{
	"epoch": 0.1152135231316726,
	"grad_norm": 0.41602540016174316,
	"learning_rate": 0.00014894472361809047,
	"loss": 1.0256,
	"step": 259
	},
	{
	"epoch": 0.11565836298932385,
	"grad_norm": 0.3379324972629547,
	"learning_rate": 0.00014874371859296482,
	"loss": 0.8982,
	"step": 260
	},
	{
	"epoch": 0.11610320284697509,
	"grad_norm": 0.3502661883831024,
	"learning_rate": 0.0001485427135678392,
	"loss": 0.8932,
	"step": 261
	},
	{
	"epoch": 0.11654804270462633,
	"grad_norm": 0.35349032282829285,
	"learning_rate": 0.00014834170854271356,
	"loss": 0.9541,
	"step": 262
	},
	{
	"epoch": 0.11699288256227758,
	"grad_norm": 0.37270882725715637,
	"learning_rate": 0.00014814070351758796,
	"loss": 0.9566,
	"step": 263
	},
	{
	"epoch": 0.11743772241992882,
	"grad_norm": 0.3453938364982605,
	"learning_rate": 0.0001479396984924623,
	"loss": 0.9901,
	"step": 264
	},
	{
	"epoch": 0.11788256227758007,
	"grad_norm": 0.4339182376861572,
	"learning_rate": 0.00014773869346733668,
	"loss": 0.9542,
	"step": 265
	},
	{
	"epoch": 0.11832740213523131,
	"grad_norm": 0.3721541166305542,
	"learning_rate": 0.00014753768844221106,
	"loss": 0.9075,
	"step": 266
	},
	{
	"epoch": 0.11877224199288257,
	"grad_norm": 0.3452603220939636,
	"learning_rate": 0.00014733668341708543,
	"loss": 0.9762,
	"step": 267
	},
	{
	"epoch": 0.11921708185053381,
	"grad_norm": 0.33102792501449585,
	"learning_rate": 0.0001471356783919598,
	"loss": 0.9648,
	"step": 268
	},
	{
	"epoch": 0.11966192170818506,
	"grad_norm": 0.38129130005836487,
	"learning_rate": 0.00014693467336683417,
	"loss": 1.0283,
	"step": 269
	},
	{
	"epoch": 0.1201067615658363,
	"grad_norm": 0.3465835452079773,
	"learning_rate": 0.00014673366834170855,
	"loss": 0.9383,
	"step": 270
	},
	{
	"epoch": 0.12055160142348754,
	"grad_norm": 0.31291693449020386,
	"learning_rate": 0.00014653266331658292,
	"loss": 1.0326,
	"step": 271
	},
	{
	"epoch": 0.12099644128113879,
	"grad_norm": 0.40111976861953735,
	"learning_rate": 0.0001463316582914573,
	"loss": 1.0133,
	"step": 272
	},
	{
	"epoch": 0.12144128113879003,
	"grad_norm": 0.3763119578361511,
	"learning_rate": 0.00014613065326633167,
	"loss": 1.0317,
	"step": 273
	},
	{
	"epoch": 0.12188612099644128,
	"grad_norm": 0.3577355742454529,
	"learning_rate": 0.00014592964824120604,
	"loss": 1.0477,
	"step": 274
	},
	{
	"epoch": 0.12233096085409252,
	"grad_norm": 0.3452180027961731,
	"learning_rate": 0.0001457286432160804,
	"loss": 1.0083,
	"step": 275
	},
	{
	"epoch": 0.12277580071174377,
	"grad_norm": 0.40135836601257324,
	"learning_rate": 0.00014552763819095479,
	"loss": 0.9175,
	"step": 276
	},
	{
	"epoch": 0.12322064056939502,
	"grad_norm": 0.45712020993232727,
	"learning_rate": 0.00014532663316582916,
	"loss": 0.9563,
	"step": 277
	},
	{
	"epoch": 0.12366548042704627,
	"grad_norm": 0.3986268937587738,
	"learning_rate": 0.00014512562814070353,
	"loss": 0.926,
	"step": 278
	},
	{
	"epoch": 0.12411032028469751,
	"grad_norm": 0.3132750988006592,
	"learning_rate": 0.0001449246231155779,
	"loss": 0.9503,
	"step": 279
	},
	{
	"epoch": 0.12455516014234876,
	"grad_norm": 0.34486275911331177,
	"learning_rate": 0.00014472361809045228,
	"loss": 0.9375,
	"step": 280
	},
	{
	"epoch": 0.125,
	"grad_norm": 0.3309858739376068,
	"learning_rate": 0.00014452261306532665,
	"loss": 0.9276,
	"step": 281
	},
	{
	"epoch": 0.12544483985765126,
	"grad_norm": 0.3567025363445282,
	"learning_rate": 0.000144321608040201,
	"loss": 0.9924,
	"step": 282
	},
	{
	"epoch": 0.1258896797153025,
	"grad_norm": 0.35258856415748596,
	"learning_rate": 0.00014412060301507537,
	"loss": 0.974,
	"step": 283
	},
	{
	"epoch": 0.12633451957295375,
	"grad_norm": 0.33701589703559875,
	"learning_rate": 0.00014391959798994977,
	"loss": 1.0021,
	"step": 284
	},
	{
	"epoch": 0.12677935943060498,
	"grad_norm": 0.3263789117336273,
	"learning_rate": 0.00014371859296482411,
	"loss": 1.051,
	"step": 285
	},
	{
	"epoch": 0.12722419928825623,
	"grad_norm": 0.39523646235466003,
	"learning_rate": 0.0001435175879396985,
	"loss": 0.9349,
	"step": 286
	},
	{
	"epoch": 0.12766903914590746,
	"grad_norm": 0.3561248183250427,
	"learning_rate": 0.00014331658291457286,
	"loss": 0.869,
	"step": 287
	},
	{
	"epoch": 0.12811387900355872,
	"grad_norm": 0.43630316853523254,
	"learning_rate": 0.00014311557788944726,
	"loss": 0.9773,
	"step": 288
	},
	{
	"epoch": 0.12855871886120995,
	"grad_norm": 0.3555675446987152,
	"learning_rate": 0.0001429145728643216,
	"loss": 1.0048,
	"step": 289
	},
	{
	"epoch": 0.1290035587188612,
	"grad_norm": 0.40323683619499207,
	"learning_rate": 0.00014271356783919598,
	"loss": 0.999,
	"step": 290
	},
	{
	"epoch": 0.12944839857651247,
	"grad_norm": 0.3344346284866333,
	"learning_rate": 0.00014251256281407035,
	"loss": 0.9882,
	"step": 291
	},
	{
	"epoch": 0.1298932384341637,
	"grad_norm": 0.3018104135990143,
	"learning_rate": 0.00014231155778894473,
	"loss": 0.9665,
	"step": 292
	},
	{
	"epoch": 0.13033807829181496,
	"grad_norm": 0.3336434066295624,
	"learning_rate": 0.0001421105527638191,
	"loss": 0.9786,
	"step": 293
	},
	{
	"epoch": 0.1307829181494662,
	"grad_norm": 0.3364970088005066,
	"learning_rate": 0.00014190954773869347,
	"loss": 0.9436,
	"step": 294
	},
	{
	"epoch": 0.13122775800711745,
	"grad_norm": 0.3738161027431488,
	"learning_rate": 0.00014170854271356784,
	"loss": 0.9884,
	"step": 295
	},
	{
	"epoch": 0.13167259786476868,
	"grad_norm": 0.34548112750053406,
	"learning_rate": 0.00014150753768844222,
	"loss": 0.9578,
	"step": 296
	},
	{
	"epoch": 0.13211743772241993,
	"grad_norm": 0.4063248336315155,
	"learning_rate": 0.0001413065326633166,
	"loss": 1.0649,
	"step": 297
	},
	{
	"epoch": 0.13256227758007116,
	"grad_norm": 0.4038452208042145,
	"learning_rate": 0.00014110552763819096,
	"loss": 0.9824,
	"step": 298
	},
	{
	"epoch": 0.13300711743772242,
	"grad_norm": 0.3010196089744568,
	"learning_rate": 0.00014090452261306534,
	"loss": 0.9492,
	"step": 299
	},
	{
	"epoch": 0.13345195729537365,
	"grad_norm": 0.3451170325279236,
	"learning_rate": 0.0001407035175879397,
	"loss": 1.1038,
	"step": 300
	},
	{
	"epoch": 0.1338967971530249,
	"grad_norm": 0.3674055337905884,
	"learning_rate": 0.00014050251256281408,
	"loss": 0.9764,
	"step": 301
	},
	{
	"epoch": 0.13434163701067617,
	"grad_norm": 0.28219959139823914,
	"learning_rate": 0.00014030150753768846,
	"loss": 0.8152,
	"step": 302
	},
	{
	"epoch": 0.1347864768683274,
	"grad_norm": 0.3264709711074829,
	"learning_rate": 0.0001401005025125628,
	"loss": 1.0336,
	"step": 303
	},
	{
	"epoch": 0.13523131672597866,
	"grad_norm": 0.3442930579185486,
	"learning_rate": 0.0001398994974874372,
	"loss": 0.9622,
	"step": 304
	},
	{
	"epoch": 0.1356761565836299,
	"grad_norm": 0.4365461468696594,
	"learning_rate": 0.00013969849246231157,
	"loss": 0.9243,
	"step": 305
	},
	{
	"epoch": 0.13612099644128114,
	"grad_norm": 0.3317567706108093,
	"learning_rate": 0.00013949748743718595,
	"loss": 0.9619,
	"step": 306
	},
	{
	"epoch": 0.13656583629893237,
	"grad_norm": 0.3600742816925049,
	"learning_rate": 0.0001392964824120603,
	"loss": 1.0869,
	"step": 307
	},
	{
	"epoch": 0.13701067615658363,
	"grad_norm": 0.414174348115921,
	"learning_rate": 0.00013909547738693467,
	"loss": 0.9696,
	"step": 308
	},
	{
	"epoch": 0.13745551601423486,
	"grad_norm": 0.3307357430458069,
	"learning_rate": 0.00013889447236180907,
	"loss": 0.8673,
	"step": 309
	},
	{
	"epoch": 0.13790035587188612,
	"grad_norm": 0.3574574291706085,
	"learning_rate": 0.0001386934673366834,
	"loss": 1.0225,
	"step": 310
	},
	{
	"epoch": 0.13834519572953738,
	"grad_norm": 0.3453276455402374,
	"learning_rate": 0.00013849246231155778,
	"loss": 0.956,
	"step": 311
	},
	{
	"epoch": 0.1387900355871886,
	"grad_norm": 0.3369787633419037,
	"learning_rate": 0.00013829145728643216,
	"loss": 0.9878,
	"step": 312
	},
	{
	"epoch": 0.13923487544483987,
	"grad_norm": 0.36153116822242737,
	"learning_rate": 0.00013809045226130656,
	"loss": 0.9906,
	"step": 313
	},
	{
	"epoch": 0.1396797153024911,
	"grad_norm": 0.3499455749988556,
	"learning_rate": 0.0001378894472361809,
	"loss": 1.0581,
	"step": 314
	},
	{
	"epoch": 0.14012455516014236,
	"grad_norm": 0.3346167206764221,
	"learning_rate": 0.00013768844221105528,
	"loss": 1.1294,
	"step": 315
	},
	{
	"epoch": 0.14056939501779359,
	"grad_norm": 0.3535563051700592,
	"learning_rate": 0.00013748743718592965,
	"loss": 1.0081,
	"step": 316
	},
	{
	"epoch": 0.14101423487544484,
	"grad_norm": 0.3797767758369446,
	"learning_rate": 0.00013728643216080402,
	"loss": 0.9821,
	"step": 317
	},
	{
	"epoch": 0.14145907473309607,
	"grad_norm": 0.35535991191864014,
	"learning_rate": 0.0001370854271356784,
	"loss": 0.9698,
	"step": 318
	},
	{
	"epoch": 0.14190391459074733,
	"grad_norm": 0.3628135323524475,
	"learning_rate": 0.00013688442211055277,
	"loss": 0.8782,
	"step": 319
	},
	{
	"epoch": 0.1423487544483986,
	"grad_norm": 0.3347620666027069,
	"learning_rate": 0.00013668341708542714,
	"loss": 0.9383,
	"step": 320
	},
	{
	"epoch": 0.14279359430604982,
	"grad_norm": 0.3387628197669983,
	"learning_rate": 0.00013648241206030151,
	"loss": 0.9846,
	"step": 321
	},
	{
	"epoch": 0.14323843416370108,
	"grad_norm": 0.33139505982398987,
	"learning_rate": 0.0001362814070351759,
	"loss": 0.9541,
	"step": 322
	},
	{
	"epoch": 0.1436832740213523,
	"grad_norm": 0.3996013104915619,
	"learning_rate": 0.00013608040201005026,
	"loss": 0.9682,
	"step": 323
	},
	{
	"epoch": 0.14412811387900357,
	"grad_norm": 0.3609457314014435,
	"learning_rate": 0.00013587939698492463,
	"loss": 0.9758,
	"step": 324
	},
	{
	"epoch": 0.1445729537366548,
	"grad_norm": 0.31257274746894836,
	"learning_rate": 0.000135678391959799,
	"loss": 0.9793,
	"step": 325
	},
	{
	"epoch": 0.14501779359430605,
	"grad_norm": 0.3800847828388214,
	"learning_rate": 0.00013547738693467338,
	"loss": 1.0033,
	"step": 326
	},
	{
	"epoch": 0.14546263345195729,
	"grad_norm": 0.32256045937538147,
	"learning_rate": 0.00013527638190954775,
	"loss": 0.9944,
	"step": 327
	},
	{
	"epoch": 0.14590747330960854,
	"grad_norm": 0.36587563157081604,
	"learning_rate": 0.0001350753768844221,
	"loss": 1.0223,
	"step": 328
	},
	{
	"epoch": 0.14635231316725977,
	"grad_norm": 0.37276628613471985,
	"learning_rate": 0.00013487437185929647,
	"loss": 0.8559,
	"step": 329
	},
	{
	"epoch": 0.14679715302491103,
	"grad_norm": 0.3412066698074341,
	"learning_rate": 0.00013467336683417087,
	"loss": 0.8826,
	"step": 330
	},
	{
	"epoch": 0.1472419928825623,
	"grad_norm": 0.33895769715309143,
	"learning_rate": 0.00013447236180904524,
	"loss": 0.9517,
	"step": 331
	},
	{
	"epoch": 0.14768683274021352,
	"grad_norm": 0.3830057680606842,
	"learning_rate": 0.0001342713567839196,
	"loss": 1.0307,
	"step": 332
	},
	{
	"epoch": 0.14813167259786478,
	"grad_norm": 0.3927343487739563,
	"learning_rate": 0.00013407035175879396,
	"loss": 0.9916,
	"step": 333
	},
	{
	"epoch": 0.148576512455516,
	"grad_norm": 0.3220495283603668,
	"learning_rate": 0.00013386934673366836,
	"loss": 0.9876,
	"step": 334
	},
	{
	"epoch": 0.14902135231316727,
	"grad_norm": 0.35559162497520447,
	"learning_rate": 0.0001336683417085427,
	"loss": 1.0066,
	"step": 335
	},
	{
	"epoch": 0.1494661921708185,
	"grad_norm": 0.36672064661979675,
	"learning_rate": 0.00013346733668341708,
	"loss": 0.9865,
	"step": 336
	},
	{
	"epoch": 0.14991103202846975,
	"grad_norm": 0.3622550964355469,
	"learning_rate": 0.00013326633165829146,
	"loss": 1.0956,
	"step": 337
	},
	{
	"epoch": 0.15035587188612098,
	"grad_norm": 0.3213232159614563,
	"learning_rate": 0.00013306532663316586,
	"loss": 0.9552,
	"step": 338
	},
	{
	"epoch": 0.15080071174377224,
	"grad_norm": 0.3889192044734955,
	"learning_rate": 0.0001328643216080402,
	"loss": 0.9417,
	"step": 339
	},
	{
	"epoch": 0.1512455516014235,
	"grad_norm": 0.32667845487594604,
	"learning_rate": 0.00013266331658291457,
	"loss": 0.9319,
	"step": 340
	},
	{
	"epoch": 0.15169039145907473,
	"grad_norm": 0.44496315717697144,
	"learning_rate": 0.00013246231155778895,
	"loss": 0.7937,
	"step": 341
	},
	{
	"epoch": 0.152135231316726,
	"grad_norm": 0.371467649936676,
	"learning_rate": 0.00013226130653266332,
	"loss": 0.9998,
	"step": 342
	},
	{
	"epoch": 0.15258007117437722,
	"grad_norm": 0.3509642779827118,
	"learning_rate": 0.0001320603015075377,
	"loss": 0.9615,
	"step": 343
	},
	{
	"epoch": 0.15302491103202848,
	"grad_norm": 0.32483533024787903,
	"learning_rate": 0.00013185929648241207,
	"loss": 0.9976,
	"step": 344
	},
	{
	"epoch": 0.1534697508896797,
	"grad_norm": 0.38299068808555603,
	"learning_rate": 0.00013165829145728644,
	"loss": 0.8436,
	"step": 345
	},
	{
	"epoch": 0.15391459074733096,
	"grad_norm": 0.36412617564201355,
	"learning_rate": 0.0001314572864321608,
	"loss": 1.0338,
	"step": 346
	},
	{
	"epoch": 0.1543594306049822,
	"grad_norm": 0.3295581638813019,
	"learning_rate": 0.00013125628140703518,
	"loss": 0.9696,
	"step": 347
	},
	{
	"epoch": 0.15480427046263345,
	"grad_norm": 0.34034737944602966,
	"learning_rate": 0.00013105527638190956,
	"loss": 0.9977,
	"step": 348
	},
	{
	"epoch": 0.1552491103202847,
	"grad_norm": 0.3623081147670746,
	"learning_rate": 0.00013085427135678393,
	"loss": 1.0032,
	"step": 349
	},
	{
	"epoch": 0.15569395017793594,
	"grad_norm": 0.3889707922935486,
	"learning_rate": 0.0001306532663316583,
	"loss": 0.9245,
	"step": 350
	},
	{
	"epoch": 0.1561387900355872,
	"grad_norm": 0.3887019455432892,
	"learning_rate": 0.00013045226130653268,
	"loss": 1.0836,
	"step": 351
	},
	{
	"epoch": 0.15658362989323843,
	"grad_norm": 0.32099735736846924,
	"learning_rate": 0.00013025125628140705,
	"loss": 0.907,
	"step": 352
	},
	{
	"epoch": 0.1570284697508897,
	"grad_norm": 0.29804012179374695,
	"learning_rate": 0.0001300502512562814,
	"loss": 0.9691,
	"step": 353
	},
	{
	"epoch": 0.15747330960854092,
	"grad_norm": 0.3739759922027588,
	"learning_rate": 0.00012984924623115577,
	"loss": 1.0826,
	"step": 354
	},
	{
	"epoch": 0.15791814946619218,
	"grad_norm": 0.3806516230106354,
	"learning_rate": 0.00012964824120603017,
	"loss": 1.0638,
	"step": 355
	},
	{
	"epoch": 0.1583629893238434,
	"grad_norm": 0.3504475951194763,
	"learning_rate": 0.00012944723618090454,
	"loss": 0.8802,
	"step": 356
	},
	{
	"epoch": 0.15880782918149466,
	"grad_norm": 0.4315776228904724,
	"learning_rate": 0.0001292462311557789,
	"loss": 1.0159,
	"step": 357
	},
	{
	"epoch": 0.1592526690391459,
	"grad_norm": 0.3647462725639343,
	"learning_rate": 0.00012904522613065326,
	"loss": 0.985,
	"step": 358
	},
	{
	"epoch": 0.15969750889679715,
	"grad_norm": 0.30111998319625854,
	"learning_rate": 0.00012884422110552766,
	"loss": 0.951,
	"step": 359
	},
	{
	"epoch": 0.1601423487544484,
	"grad_norm": 0.3357127904891968,
	"learning_rate": 0.000128643216080402,
	"loss": 1.1002,
	"step": 360
	},
	{
	"epoch": 0.16058718861209964,
	"grad_norm": 0.4213799834251404,
	"learning_rate": 0.00012844221105527638,
	"loss": 0.8264,
	"step": 361
	},
	{
	"epoch": 0.1610320284697509,
	"grad_norm": 0.4168066084384918,
	"learning_rate": 0.00012824120603015075,
	"loss": 1.0548,
	"step": 362
	},
	{
	"epoch": 0.16147686832740213,
	"grad_norm": 0.36040782928466797,
	"learning_rate": 0.00012804020100502515,
	"loss": 1.0988,
	"step": 363
	},
	{
	"epoch": 0.1619217081850534,
	"grad_norm": 0.315931499004364,
	"learning_rate": 0.0001278391959798995,
	"loss": 0.8874,
	"step": 364
	},
	{
	"epoch": 0.16236654804270462,
	"grad_norm": 0.3526541292667389,
	"learning_rate": 0.00012763819095477387,
	"loss": 0.8408,
	"step": 365
	},
	{
	"epoch": 0.16281138790035588,
	"grad_norm": 0.315824031829834,
	"learning_rate": 0.00012743718592964824,
	"loss": 0.9537,
	"step": 366
	},
	{
	"epoch": 0.1632562277580071,
	"grad_norm": 0.36629655957221985,
	"learning_rate": 0.00012723618090452262,
	"loss": 0.9732,
	"step": 367
	},
	{
	"epoch": 0.16370106761565836,
	"grad_norm": 0.3385011553764343,
	"learning_rate": 0.000127035175879397,
	"loss": 1.0207,
	"step": 368
	},
	{
	"epoch": 0.16414590747330962,
	"grad_norm": 0.43948590755462646,
	"learning_rate": 0.00012683417085427136,
	"loss": 0.8719,
	"step": 369
	},
	{
	"epoch": 0.16459074733096085,
	"grad_norm": 0.4001463055610657,
	"learning_rate": 0.00012663316582914574,
	"loss": 1.085,
	"step": 370
	},
	{
	"epoch": 0.1650355871886121,
	"grad_norm": 0.37441739439964294,
	"learning_rate": 0.0001264321608040201,
	"loss": 0.9799,
	"step": 371
	},
	{
	"epoch": 0.16548042704626334,
	"grad_norm": 0.29792410135269165,
	"learning_rate": 0.00012623115577889448,
	"loss": 0.8901,
	"step": 372
	},
	{
	"epoch": 0.1659252669039146,
	"grad_norm": 0.42688125371932983,
	"learning_rate": 0.00012603015075376885,
	"loss": 1.085,
	"step": 373
	},
	{
	"epoch": 0.16637010676156583,
	"grad_norm": 0.3854696750640869,
	"learning_rate": 0.00012582914572864323,
	"loss": 0.9271,
	"step": 374
	},
	{
	"epoch": 0.16681494661921709,
	"grad_norm": 0.40170320868492126,
	"learning_rate": 0.0001256281407035176,
	"loss": 0.9355,
	"step": 375
	},
	{
	"epoch": 0.16725978647686832,
	"grad_norm": 0.3294379711151123,
	"learning_rate": 0.00012542713567839197,
	"loss": 0.9221,
	"step": 376
	},
	{
	"epoch": 0.16770462633451957,
	"grad_norm": 0.3526048958301544,
	"learning_rate": 0.00012522613065326635,
	"loss": 1.0483,
	"step": 377
	},
	{
	"epoch": 0.16814946619217083,
	"grad_norm": 0.3107386529445648,
	"learning_rate": 0.0001250251256281407,
	"loss": 0.9166,
	"step": 378
	},
	{
	"epoch": 0.16859430604982206,
	"grad_norm": 0.4228864908218384,
	"learning_rate": 0.00012482412060301507,
	"loss": 1.0207,
	"step": 379
	},
	{
	"epoch": 0.16903914590747332,
	"grad_norm": 0.3822677731513977,
	"learning_rate": 0.00012462311557788947,
	"loss": 0.9935,
	"step": 380
	},
	{
	"epoch": 0.16948398576512455,
	"grad_norm": 0.3852001428604126,
	"learning_rate": 0.00012442211055276384,
	"loss": 0.9831,
	"step": 381
	},
	{
	"epoch": 0.1699288256227758,
	"grad_norm": 0.3484053313732147,
	"learning_rate": 0.00012422110552763818,
	"loss": 0.9327,
	"step": 382
	},
	{
	"epoch": 0.17037366548042704,
	"grad_norm": 0.3499116897583008,
	"learning_rate": 0.00012402010050251256,
	"loss": 1.0521,
	"step": 383
	},
	{
	"epoch": 0.1708185053380783,
	"grad_norm": 0.362118661403656,
	"learning_rate": 0.00012381909547738696,
	"loss": 1.0482,
	"step": 384
	},
	{
	"epoch": 0.17126334519572953,
	"grad_norm": 0.3913699686527252,
	"learning_rate": 0.0001236180904522613,
	"loss": 0.9908,
	"step": 385
	},
	{
	"epoch": 0.17170818505338079,
	"grad_norm": 0.3362598121166229,
	"learning_rate": 0.00012341708542713568,
	"loss": 0.9814,
	"step": 386
	},
	{
	"epoch": 0.17215302491103202,
	"grad_norm": 0.34024205803871155,
	"learning_rate": 0.00012321608040201005,
	"loss": 0.9747,
	"step": 387
	},
	{
	"epoch": 0.17259786476868327,
	"grad_norm": 0.35168662667274475,
	"learning_rate": 0.00012301507537688445,
	"loss": 1.0067,
	"step": 388
	},
	{
	"epoch": 0.17304270462633453,
	"grad_norm": 0.30861225724220276,
	"learning_rate": 0.0001228140703517588,
	"loss": 1.06,
	"step": 389
	},
	{
	"epoch": 0.17348754448398576,
	"grad_norm": 0.3759268522262573,
	"learning_rate": 0.00012261306532663317,
	"loss": 1.1586,
	"step": 390
	},
	{
	"epoch": 0.17393238434163702,
	"grad_norm": 0.3894938826560974,
	"learning_rate": 0.00012241206030150754,
	"loss": 0.9451,
	"step": 391
	},
	{
	"epoch": 0.17437722419928825,
	"grad_norm": 0.3796166777610779,
	"learning_rate": 0.00012221105527638191,
	"loss": 0.9602,
	"step": 392
	},
	{
	"epoch": 0.1748220640569395,
	"grad_norm": 0.32060232758522034,
	"learning_rate": 0.00012201005025125629,
	"loss": 1.0365,
	"step": 393
	},
	{
	"epoch": 0.17526690391459074,
	"grad_norm": 0.33367520570755005,
	"learning_rate": 0.00012180904522613066,
	"loss": 0.9616,
	"step": 394
	},
	{
	"epoch": 0.175711743772242,
	"grad_norm": 0.3084149658679962,
	"learning_rate": 0.00012160804020100502,
	"loss": 0.8514,
	"step": 395
	},
	{
	"epoch": 0.17615658362989323,
	"grad_norm": 0.32909664511680603,
	"learning_rate": 0.00012140703517587942,
	"loss": 0.9773,
	"step": 396
	},
	{
	"epoch": 0.17660142348754448,
	"grad_norm": 0.3140466511249542,
	"learning_rate": 0.00012120603015075378,
	"loss": 0.9808,
	"step": 397
	},
	{
	"epoch": 0.17704626334519574,
	"grad_norm": 0.34496647119522095,
	"learning_rate": 0.00012100502512562815,
	"loss": 0.8233,
	"step": 398
	},
	{
	"epoch": 0.17749110320284697,
	"grad_norm": 0.4260810315608978,
	"learning_rate": 0.00012080402010050251,
	"loss": 0.9703,
	"step": 399
	},
	{
	"epoch": 0.17793594306049823,
	"grad_norm": 0.3465157747268677,
	"learning_rate": 0.00012060301507537688,
	"loss": 1.0121,
	"step": 400
	},
	{
	"epoch": 0.17838078291814946,
	"grad_norm": 0.3023923635482788,
	"learning_rate": 0.00012040201005025127,
	"loss": 0.9899,
	"step": 401
	},
	{
	"epoch": 0.17882562277580072,
	"grad_norm": 0.3486076593399048,
	"learning_rate": 0.00012020100502512563,
	"loss": 0.9325,
	"step": 402
	},
	{
	"epoch": 0.17927046263345195,
	"grad_norm": 0.3666176199913025,
	"learning_rate": 0.00012,
	"loss": 1.019,
	"step": 403
	},
	{
	"epoch": 0.1797153024911032,
	"grad_norm": 0.3321322798728943,
	"learning_rate": 0.00011979899497487436,
	"loss": 0.9197,
	"step": 404
	},
	{
	"epoch": 0.18016014234875444,
	"grad_norm": 0.4836527407169342,
	"learning_rate": 0.00011959798994974876,
	"loss": 0.8488,
	"step": 405
	},
	{
	"epoch": 0.1806049822064057,
	"grad_norm": 0.3428821861743927,
	"learning_rate": 0.00011939698492462312,
	"loss": 1.0314,
	"step": 406
	},
	{
	"epoch": 0.18104982206405695,
	"grad_norm": 0.38532841205596924,
	"learning_rate": 0.0001191959798994975,
	"loss": 0.9393,
	"step": 407
	},
	{
	"epoch": 0.18149466192170818,
	"grad_norm": 0.3802700340747833,
	"learning_rate": 0.00011899497487437185,
	"loss": 0.9136,
	"step": 408
	},
	{
	"epoch": 0.18193950177935944,
	"grad_norm": 0.3403851389884949,
	"learning_rate": 0.00011879396984924624,
	"loss": 0.9552,
	"step": 409
	},
	{
	"epoch": 0.18238434163701067,
	"grad_norm": 0.3999098241329193,
	"learning_rate": 0.00011859296482412061,
	"loss": 0.9292,
	"step": 410
	},
	{
	"epoch": 0.18282918149466193,
	"grad_norm": 0.3740805685520172,
	"learning_rate": 0.00011839195979899497,
	"loss": 1.0809,
	"step": 411
	},
	{
	"epoch": 0.18327402135231316,
	"grad_norm": 0.42133694887161255,
	"learning_rate": 0.00011819095477386935,
	"loss": 0.8661,
	"step": 412
	},
	{
	"epoch": 0.18371886120996442,
	"grad_norm": 0.39346155524253845,
	"learning_rate": 0.00011798994974874373,
	"loss": 1.0682,
	"step": 413
	},
	{
	"epoch": 0.18416370106761565,
	"grad_norm": 0.3395063281059265,
	"learning_rate": 0.0001177889447236181,
	"loss": 0.9672,
	"step": 414
	},
	{
	"epoch": 0.1846085409252669,
	"grad_norm": 0.3930981755256653,
	"learning_rate": 0.00011758793969849247,
	"loss": 0.9354,
	"step": 415
	},
	{
	"epoch": 0.18505338078291814,
	"grad_norm": 0.40588700771331787,
	"learning_rate": 0.00011738693467336684,
	"loss": 0.9723,
	"step": 416
	},
	{
	"epoch": 0.1854982206405694,
	"grad_norm": 0.4842967689037323,
	"learning_rate": 0.00011718592964824122,
	"loss": 0.9098,
	"step": 417
	},
	{
	"epoch": 0.18594306049822065,
	"grad_norm": 0.34736424684524536,
	"learning_rate": 0.00011698492462311558,
	"loss": 0.9383,
	"step": 418
	},
	{
	"epoch": 0.18638790035587188,
	"grad_norm": 0.41721245646476746,
	"learning_rate": 0.00011678391959798996,
	"loss": 0.9804,
	"step": 419
	},
	{
	"epoch": 0.18683274021352314,
	"grad_norm": 0.320420503616333,
	"learning_rate": 0.00011658291457286432,
	"loss": 0.9266,
	"step": 420
	},
	{
	"epoch": 0.18727758007117437,
	"grad_norm": 0.3732924461364746,
	"learning_rate": 0.00011638190954773872,
	"loss": 0.9703,
	"step": 421
	},
	{
	"epoch": 0.18772241992882563,
	"grad_norm": 0.3584558069705963,
	"learning_rate": 0.00011618090452261308,
	"loss": 0.8832,
	"step": 422
	},
	{
	"epoch": 0.18816725978647686,
	"grad_norm": 0.3892074525356293,
	"learning_rate": 0.00011597989949748745,
	"loss": 0.9617,
	"step": 423
	},
	{
	"epoch": 0.18861209964412812,
	"grad_norm": 0.3820159435272217,
	"learning_rate": 0.00011577889447236181,
	"loss": 0.9092,
	"step": 424
	},
	{
	"epoch": 0.18905693950177935,
	"grad_norm": 0.33480194211006165,
	"learning_rate": 0.00011557788944723618,
	"loss": 1.0104,
	"step": 425
	},
	{
	"epoch": 0.1895017793594306,
	"grad_norm": 0.41536465287208557,
	"learning_rate": 0.00011537688442211057,
	"loss": 0.9157,
	"step": 426
	},
	{
	"epoch": 0.18994661921708186,
	"grad_norm": 0.3305935263633728,
	"learning_rate": 0.00011517587939698493,
	"loss": 0.9622,
	"step": 427
	},
	{
	"epoch": 0.1903914590747331,
	"grad_norm": 0.4637777805328369,
	"learning_rate": 0.0001149748743718593,
	"loss": 0.9317,
	"step": 428
	},
	{
	"epoch": 0.19083629893238435,
	"grad_norm": 0.38072511553764343,
	"learning_rate": 0.00011477386934673366,
	"loss": 1.0061,
	"step": 429
	},
	{
	"epoch": 0.19128113879003558,
	"grad_norm": 0.35834434628486633,
	"learning_rate": 0.00011457286432160806,
	"loss": 1.0108,
	"step": 430
	},
	{
	"epoch": 0.19172597864768684,
	"grad_norm": 0.4265679717063904,
	"learning_rate": 0.00011437185929648242,
	"loss": 0.9451,
	"step": 431
	},
	{
	"epoch": 0.19217081850533807,
	"grad_norm": 0.38988345861434937,
	"learning_rate": 0.00011417085427135679,
	"loss": 1.1026,
	"step": 432
	},
	{
	"epoch": 0.19261565836298933,
	"grad_norm": 0.35931700468063354,
	"learning_rate": 0.00011396984924623115,
	"loss": 0.9296,
	"step": 433
	},
	{
	"epoch": 0.19306049822064056,
	"grad_norm": 0.3506704568862915,
	"learning_rate": 0.00011376884422110554,
	"loss": 0.942,
	"step": 434
	},
	{
	"epoch": 0.19350533807829182,
	"grad_norm": 0.38077566027641296,
	"learning_rate": 0.00011356783919597991,
	"loss": 1.0012,
	"step": 435
	},
	{
	"epoch": 0.19395017793594305,
	"grad_norm": 0.3146056532859802,
	"learning_rate": 0.00011336683417085427,
	"loss": 0.9361,
	"step": 436
	},
	{
	"epoch": 0.1943950177935943,
	"grad_norm": 0.42160287499427795,
	"learning_rate": 0.00011316582914572864,
	"loss": 0.9426,
	"step": 437
	},
	{
	"epoch": 0.19483985765124556,
	"grad_norm": 0.33514153957366943,
	"learning_rate": 0.00011296482412060303,
	"loss": 1.0237,
	"step": 438
	},
	{
	"epoch": 0.1952846975088968,
	"grad_norm": 0.46194738149642944,
	"learning_rate": 0.0001127638190954774,
	"loss": 1.095,
	"step": 439
	},
	{
	"epoch": 0.19572953736654805,
	"grad_norm": 0.40086501836776733,
	"learning_rate": 0.00011256281407035176,
	"loss": 0.9239,
	"step": 440
	},
	{
	"epoch": 0.19617437722419928,
	"grad_norm": 0.34976860880851746,
	"learning_rate": 0.00011236180904522614,
	"loss": 0.8445,
	"step": 441
	},
	{
	"epoch": 0.19661921708185054,
	"grad_norm": 0.35818928480148315,
	"learning_rate": 0.00011216080402010052,
	"loss": 0.9626,
	"step": 442
	},
	{
	"epoch": 0.19706405693950177,
	"grad_norm": 0.35546717047691345,
	"learning_rate": 0.00011195979899497488,
	"loss": 0.981,
	"step": 443
	},
	{
	"epoch": 0.19750889679715303,
	"grad_norm": 0.32887890934944153,
	"learning_rate": 0.00011175879396984925,
	"loss": 0.9953,
	"step": 444
	},
	{
	"epoch": 0.19795373665480426,
	"grad_norm": 0.3613591492176056,
	"learning_rate": 0.00011155778894472361,
	"loss": 0.9798,
	"step": 445
	},
	{
	"epoch": 0.19839857651245552,
	"grad_norm": 0.3338494896888733,
	"learning_rate": 0.00011135678391959799,
	"loss": 1.0195,
	"step": 446
	},
	{
	"epoch": 0.19884341637010677,
	"grad_norm": 0.3248537480831146,
	"learning_rate": 0.00011115577889447237,
	"loss": 0.9145,
	"step": 447
	},
	{
	"epoch": 0.199288256227758,
	"grad_norm": 0.35757869482040405,
	"learning_rate": 0.00011095477386934675,
	"loss": 0.898,
	"step": 448
	},
	{
	"epoch": 0.19973309608540926,
	"grad_norm": 0.35583075881004333,
	"learning_rate": 0.0001107537688442211,
	"loss": 1.0303,
	"step": 449
	},
	{
	"epoch": 0.2001779359430605,
	"grad_norm": 0.5167235136032104,
	"learning_rate": 0.00011055276381909548,
	"loss": 0.8168,
	"step": 450
	},
	{
	"epoch": 0.20062277580071175,
	"grad_norm": 0.3596641719341278,
	"learning_rate": 0.00011035175879396986,
	"loss": 0.9184,
	"step": 451
	},
	{
	"epoch": 0.20106761565836298,
	"grad_norm": 0.4046080410480499,
	"learning_rate": 0.00011015075376884422,
	"loss": 1.0039,
	"step": 452
	},
	{
	"epoch": 0.20151245551601424,
	"grad_norm": 0.36985480785369873,
	"learning_rate": 0.0001099497487437186,
	"loss": 0.9409,
	"step": 453
	},
	{
	"epoch": 0.20195729537366547,
	"grad_norm": 0.36848029494285583,
	"learning_rate": 0.00010974874371859296,
	"loss": 1.0075,
	"step": 454
	},
	{
	"epoch": 0.20240213523131673,
	"grad_norm": 0.3421315550804138,
	"learning_rate": 0.00010954773869346736,
	"loss": 1.0761,
	"step": 455
	},
	{
	"epoch": 0.20284697508896798,
	"grad_norm": 0.35291051864624023,
	"learning_rate": 0.00010934673366834172,
	"loss": 0.9096,
	"step": 456
	},
	{
	"epoch": 0.20329181494661921,
	"grad_norm": 0.3597058355808258,
	"learning_rate": 0.00010914572864321609,
	"loss": 0.9133,
	"step": 457
	},
	{
	"epoch": 0.20373665480427047,
	"grad_norm": 0.3638782501220703,
	"learning_rate": 0.00010894472361809045,
	"loss": 0.9936,
	"step": 458
	},
	{
	"epoch": 0.2041814946619217,
	"grad_norm": 0.33646896481513977,
	"learning_rate": 0.00010874371859296483,
	"loss": 0.9592,
	"step": 459
	},
	{
	"epoch": 0.20462633451957296,
	"grad_norm": 0.3791368901729584,
	"learning_rate": 0.00010854271356783921,
	"loss": 0.9708,
	"step": 460
	},
	{
	"epoch": 0.2050711743772242,
	"grad_norm": 0.3495243489742279,
	"learning_rate": 0.00010834170854271357,
	"loss": 0.9908,
	"step": 461
	},
	{
	"epoch": 0.20551601423487545,
	"grad_norm": 0.34329208731651306,
	"learning_rate": 0.00010814070351758794,
	"loss": 0.9722,
	"step": 462
	},
	{
	"epoch": 0.20596085409252668,
	"grad_norm": 0.40529054403305054,
	"learning_rate": 0.00010793969849246233,
	"loss": 0.9758,
	"step": 463
	},
	{
	"epoch": 0.20640569395017794,
	"grad_norm": 0.44128406047821045,
	"learning_rate": 0.0001077386934673367,
	"loss": 0.9661,
	"step": 464
	},
	{
	"epoch": 0.20685053380782917,
	"grad_norm": 0.3696465790271759,
	"learning_rate": 0.00010753768844221106,
	"loss": 1.0031,
	"step": 465
	},
	{
	"epoch": 0.20729537366548043,
	"grad_norm": 0.36873361468315125,
	"learning_rate": 0.00010733668341708543,
	"loss": 0.9141,
	"step": 466
	},
	{
	"epoch": 0.20774021352313168,
	"grad_norm": 0.40861397981643677,
	"learning_rate": 0.00010713567839195982,
	"loss": 0.9727,
	"step": 467
	},
	{
	"epoch": 0.20818505338078291,
	"grad_norm": 0.3468811511993408,
	"learning_rate": 0.00010693467336683418,
	"loss": 1.0182,
	"step": 468
	},
	{
	"epoch": 0.20862989323843417,
	"grad_norm": 0.37425243854522705,
	"learning_rate": 0.00010673366834170855,
	"loss": 0.9268,
	"step": 469
	},
	{
	"epoch": 0.2090747330960854,
	"grad_norm": 0.4296034574508667,
	"learning_rate": 0.00010653266331658291,
	"loss": 1.116,
	"step": 470
	},
	{
	"epoch": 0.20951957295373666,
	"grad_norm": 0.3852652609348297,
	"learning_rate": 0.00010633165829145728,
	"loss": 0.9378,
	"step": 471
	},
	{
	"epoch": 0.2099644128113879,
	"grad_norm": 0.34109607338905334,
	"learning_rate": 0.00010613065326633167,
	"loss": 1.004,
	"step": 472
	},
	{
	"epoch": 0.21040925266903915,
	"grad_norm": 0.4025616943836212,
	"learning_rate": 0.00010592964824120604,
	"loss": 0.96,
	"step": 473
	},
	{
	"epoch": 0.21085409252669038,
	"grad_norm": 0.3672776222229004,
	"learning_rate": 0.0001057286432160804,
	"loss": 0.8744,
	"step": 474
	},
	{
	"epoch": 0.21129893238434164,
	"grad_norm": 0.36405524611473083,
	"learning_rate": 0.00010552763819095478,
	"loss": 0.8957,
	"step": 475
	},
	{
	"epoch": 0.2117437722419929,
	"grad_norm": 0.41747644543647766,
	"learning_rate": 0.00010532663316582916,
	"loss": 1.0165,
	"step": 476
	},
	{
	"epoch": 0.21218861209964412,
	"grad_norm": 0.3478085994720459,
	"learning_rate": 0.00010512562814070352,
	"loss": 0.9776,
	"step": 477
	},
	{
	"epoch": 0.21263345195729538,
	"grad_norm": 0.365633487701416,
	"learning_rate": 0.0001049246231155779,
	"loss": 0.9826,
	"step": 478
	},
	{
	"epoch": 0.2130782918149466,
	"grad_norm": 0.3211476802825928,
	"learning_rate": 0.00010472361809045225,
	"loss": 0.9228,
	"step": 479
	},
	{
	"epoch": 0.21352313167259787,
	"grad_norm": 0.3374654948711395,
	"learning_rate": 0.00010452261306532664,
	"loss": 0.9379,
	"step": 480
	},
	{
	"epoch": 0.2139679715302491,
	"grad_norm": 0.34640708565711975,
	"learning_rate": 0.00010432160804020101,
	"loss": 0.8678,
	"step": 481
	},
	{
	"epoch": 0.21441281138790036,
	"grad_norm": 0.38176846504211426,
	"learning_rate": 0.00010412060301507539,
	"loss": 0.9597,
	"step": 482
	},
	{
	"epoch": 0.2148576512455516,
	"grad_norm": 0.34589311480522156,
	"learning_rate": 0.00010391959798994975,
	"loss": 0.9423,
	"step": 483
	},
	{
	"epoch": 0.21530249110320285,
	"grad_norm": 0.4062221348285675,
	"learning_rate": 0.00010371859296482413,
	"loss": 0.9926,
	"step": 484
	},
	{
	"epoch": 0.2157473309608541,
	"grad_norm": 0.3403069078922272,
	"learning_rate": 0.0001035175879396985,
	"loss": 0.9953,
	"step": 485
	},
	{
	"epoch": 0.21619217081850534,
	"grad_norm": 0.3341009318828583,
	"learning_rate": 0.00010331658291457286,
	"loss": 0.9807,
	"step": 486
	},
	{
	"epoch": 0.2166370106761566,
	"grad_norm": 0.29718342423439026,
	"learning_rate": 0.00010311557788944724,
	"loss": 0.9296,
	"step": 487
	},
	{
	"epoch": 0.21708185053380782,
	"grad_norm": 0.4219815135002136,
	"learning_rate": 0.00010291457286432162,
	"loss": 1.0007,
	"step": 488
	},
	{
	"epoch": 0.21752669039145908,
	"grad_norm": 0.30951768159866333,
	"learning_rate": 0.00010271356783919598,
	"loss": 0.9686,
	"step": 489
	},
	{
	"epoch": 0.2179715302491103,
	"grad_norm": 0.38819047808647156,
	"learning_rate": 0.00010251256281407036,
	"loss": 1.1859,
	"step": 490
	},
	{
	"epoch": 0.21841637010676157,
	"grad_norm": 0.3185841739177704,
	"learning_rate": 0.00010231155778894473,
	"loss": 0.9523,
	"step": 491
	},
	{
	"epoch": 0.2188612099644128,
	"grad_norm": 0.3971594274044037,
	"learning_rate": 0.00010211055276381909,
	"loss": 1.0133,
	"step": 492
	},
	{
	"epoch": 0.21930604982206406,
	"grad_norm": 0.31492552161216736,
	"learning_rate": 0.00010190954773869348,
	"loss": 0.8269,
	"step": 493
	},
	{
	"epoch": 0.2197508896797153,
	"grad_norm": 0.3949122130870819,
	"learning_rate": 0.00010170854271356785,
	"loss": 1.0379,
	"step": 494
	},
	{
	"epoch": 0.22019572953736655,
	"grad_norm": 0.30094975233078003,
	"learning_rate": 0.00010150753768844221,
	"loss": 0.9071,
	"step": 495
	},
	{
	"epoch": 0.2206405693950178,
	"grad_norm": 0.3560626208782196,
	"learning_rate": 0.00010130653266331658,
	"loss": 0.9853,
	"step": 496
	},
	{
	"epoch": 0.22108540925266904,
	"grad_norm": 0.4022250771522522,
	"learning_rate": 0.00010110552763819097,
	"loss": 0.9823,
	"step": 497
	},
	{
	"epoch": 0.2215302491103203,
	"grad_norm": 0.3849669098854065,
	"learning_rate": 0.00010090452261306533,
	"loss": 0.9176,
	"step": 498
	},
	{
	"epoch": 0.22197508896797152,
	"grad_norm": 0.47436994314193726,
	"learning_rate": 0.0001007035175879397,
	"loss": 0.9948,
	"step": 499
	},
	{
	"epoch": 0.22241992882562278,
	"grad_norm": 0.32288622856140137,
	"learning_rate": 0.00010050251256281407,
	"loss": 0.9409,
	"step": 500
	},
	{
	"epoch": 0.222864768683274,
	"grad_norm": 0.33923619985580444,
	"learning_rate": 0.00010030150753768846,
	"loss": 0.9096,
	"step": 501
	},
	{
	"epoch": 0.22330960854092527,
	"grad_norm": 0.3114798665046692,
	"learning_rate": 0.00010010050251256282,
	"loss": 0.9112,
	"step": 502
	},
	{
	"epoch": 0.2237544483985765,
	"grad_norm": 0.37581518292427063,
	"learning_rate": 9.989949748743719e-05,
	"loss": 0.9831,
	"step": 503
	},
	{
	"epoch": 0.22419928825622776,
	"grad_norm": 0.33060386776924133,
	"learning_rate": 9.969849246231156e-05,
	"loss": 0.8942,
	"step": 504
	},
	{
	"epoch": 0.22464412811387902,
	"grad_norm": 0.32445380091667175,
	"learning_rate": 9.949748743718594e-05,
	"loss": 0.9665,
	"step": 505
	},
	{
	"epoch": 0.22508896797153025,
	"grad_norm": 0.3652149438858032,
	"learning_rate": 9.929648241206031e-05,
	"loss": 1.0325,
	"step": 506
	},
	{
	"epoch": 0.2255338078291815,
	"grad_norm": 0.3789691925048828,
	"learning_rate": 9.909547738693468e-05,
	"loss": 0.9488,
	"step": 507
	},
	{
	"epoch": 0.22597864768683273,
	"grad_norm": 0.35577788949012756,
	"learning_rate": 9.889447236180906e-05,
	"loss": 0.9324,
	"step": 508
	},
	{
	"epoch": 0.226423487544484,
	"grad_norm": 0.37785062193870544,
	"learning_rate": 9.869346733668342e-05,
	"loss": 0.9934,
	"step": 509
	},
	{
	"epoch": 0.22686832740213522,
	"grad_norm": 0.3577893078327179,
	"learning_rate": 9.84924623115578e-05,
	"loss": 1.0183,
	"step": 510
	},
	{
	"epoch": 0.22731316725978648,
	"grad_norm": 0.3955710232257843,
	"learning_rate": 9.829145728643216e-05,
	"loss": 0.9138,
	"step": 511
	},
	{
	"epoch": 0.2277580071174377,
	"grad_norm": 0.333099365234375,
	"learning_rate": 9.809045226130655e-05,
	"loss": 0.9533,
	"step": 512
	},
	{
	"epoch": 0.22820284697508897,
	"grad_norm": 0.358026921749115,
	"learning_rate": 9.788944723618091e-05,
	"loss": 0.9919,
	"step": 513
	},
	{
	"epoch": 0.22864768683274023,
	"grad_norm": 0.3610975742340088,
	"learning_rate": 9.768844221105528e-05,
	"loss": 1.0059,
	"step": 514
	},
	{
	"epoch": 0.22909252669039146,
	"grad_norm": 0.32842594385147095,
	"learning_rate": 9.748743718592965e-05,
	"loss": 0.9877,
	"step": 515
	},
	{
	"epoch": 0.22953736654804271,
	"grad_norm": 0.4672807455062866,
	"learning_rate": 9.728643216080403e-05,
	"loss": 1.0148,
	"step": 516
	},
	{
	"epoch": 0.22998220640569395,
	"grad_norm": 0.3504094183444977,
	"learning_rate": 9.70854271356784e-05,
	"loss": 0.8774,
	"step": 517
	},
	{
	"epoch": 0.2304270462633452,
	"grad_norm": 0.3299584984779358,
	"learning_rate": 9.688442211055276e-05,
	"loss": 0.9018,
	"step": 518
	},
	{
	"epoch": 0.23087188612099643,
	"grad_norm": 0.3598071336746216,
	"learning_rate": 9.668341708542715e-05,
	"loss": 0.9,
	"step": 519
	},
	{
	"epoch": 0.2313167259786477,
	"grad_norm": 0.366554856300354,
	"learning_rate": 9.64824120603015e-05,
	"loss": 0.8612,
	"step": 520
	},
	{
	"epoch": 0.23176156583629892,
	"grad_norm": 0.3408771753311157,
	"learning_rate": 9.628140703517589e-05,
	"loss": 0.9482,
	"step": 521
	},
	{
	"epoch": 0.23220640569395018,
	"grad_norm": 0.4127398133277893,
	"learning_rate": 9.608040201005025e-05,
	"loss": 0.9427,
	"step": 522
	},
	{
	"epoch": 0.2326512455516014,
	"grad_norm": 0.4046645164489746,
	"learning_rate": 9.587939698492462e-05,
	"loss": 1.0469,
	"step": 523
	},
	{
	"epoch": 0.23309608540925267,
	"grad_norm": 0.33988115191459656,
	"learning_rate": 9.5678391959799e-05,
	"loss": 0.888,
	"step": 524
	},
	{
	"epoch": 0.23354092526690393,
	"grad_norm": 0.35615596175193787,
	"learning_rate": 9.547738693467337e-05,
	"loss": 0.9522,
	"step": 525
	},
	{
	"epoch": 0.23398576512455516,
	"grad_norm": 0.41455528140068054,
	"learning_rate": 9.527638190954774e-05,
	"loss": 0.9495,
	"step": 526
	},
	{
	"epoch": 0.23443060498220641,
	"grad_norm": 0.4435769021511078,
	"learning_rate": 9.507537688442212e-05,
	"loss": 0.9251,
	"step": 527
	},
	{
	"epoch": 0.23487544483985764,
	"grad_norm": 0.34414413571357727,
	"learning_rate": 9.487437185929649e-05,
	"loss": 0.9746,
	"step": 528
	},
	{
	"epoch": 0.2353202846975089,
	"grad_norm": 0.42099055647850037,
	"learning_rate": 9.467336683417086e-05,
	"loss": 1.0079,
	"step": 529
	},
	{
	"epoch": 0.23576512455516013,
	"grad_norm": 0.3634389042854309,
	"learning_rate": 9.447236180904523e-05,
	"loss": 0.8452,
	"step": 530
	},
	{
	"epoch": 0.2362099644128114,
	"grad_norm": 0.45584288239479065,
	"learning_rate": 9.427135678391961e-05,
	"loss": 0.9618,
	"step": 531
	},
	{
	"epoch": 0.23665480427046262,
	"grad_norm": 0.3558484613895416,
	"learning_rate": 9.407035175879397e-05,
	"loss": 1.007,
	"step": 532
	},
	{
	"epoch": 0.23709964412811388,
	"grad_norm": 0.41819700598716736,
	"learning_rate": 9.386934673366835e-05,
	"loss": 0.8806,
	"step": 533
	},
	{
	"epoch": 0.23754448398576514,
	"grad_norm": 0.39965230226516724,
	"learning_rate": 9.366834170854271e-05,
	"loss": 1.0749,
	"step": 534
	},
	{
	"epoch": 0.23798932384341637,
	"grad_norm": 0.35675838589668274,
	"learning_rate": 9.34673366834171e-05,
	"loss": 1.0074,
	"step": 535
	},
	{
	"epoch": 0.23843416370106763,
	"grad_norm": 0.37170112133026123,
	"learning_rate": 9.326633165829146e-05,
	"loss": 0.9623,
	"step": 536
	},
	{
	"epoch": 0.23887900355871886,
	"grad_norm": 0.35851290822029114,
	"learning_rate": 9.306532663316585e-05,
	"loss": 0.9361,
	"step": 537
	},
	{
	"epoch": 0.2393238434163701,
	"grad_norm": 0.4395543038845062,
	"learning_rate": 9.28643216080402e-05,
	"loss": 0.871,
	"step": 538
	},
	{
	"epoch": 0.23976868327402134,
	"grad_norm": 0.2868823707103729,
	"learning_rate": 9.266331658291458e-05,
	"loss": 0.9066,
	"step": 539
	},
	{
	"epoch": 0.2402135231316726,
	"grad_norm": 0.3684181869029999,
	"learning_rate": 9.246231155778895e-05,
	"loss": 1.0657,
	"step": 540
	},
	{
	"epoch": 0.24065836298932383,
	"grad_norm": 0.347599059343338,
	"learning_rate": 9.226130653266331e-05,
	"loss": 1.0081,
	"step": 541
	},
	{
	"epoch": 0.2411032028469751,
	"grad_norm": 0.32958024740219116,
	"learning_rate": 9.20603015075377e-05,
	"loss": 0.9427,
	"step": 542
	},
	{
	"epoch": 0.24154804270462635,
	"grad_norm": 0.3672914505004883,
	"learning_rate": 9.185929648241206e-05,
	"loss": 0.994,
	"step": 543
	},
	{
	"epoch": 0.24199288256227758,
	"grad_norm": 0.3725748658180237,
	"learning_rate": 9.165829145728644e-05,
	"loss": 0.9835,
	"step": 544
	},
	{
	"epoch": 0.24243772241992884,
	"grad_norm": 0.39337027072906494,
	"learning_rate": 9.14572864321608e-05,
	"loss": 0.8137,
	"step": 545
	},
	{
	"epoch": 0.24288256227758007,
	"grad_norm": 0.32661673426628113,
	"learning_rate": 9.125628140703519e-05,
	"loss": 1.0025,
	"step": 546
	},
	{
	"epoch": 0.24332740213523132,
	"grad_norm": 0.3912467956542969,
	"learning_rate": 9.105527638190955e-05,
	"loss": 0.9087,
	"step": 547
	},
	{
	"epoch": 0.24377224199288255,
	"grad_norm": 0.37240296602249146,
	"learning_rate": 9.085427135678392e-05,
	"loss": 0.9275,
	"step": 548
	},
	{
	"epoch": 0.2442170818505338,
	"grad_norm": 0.3758089244365692,
	"learning_rate": 9.06532663316583e-05,
	"loss": 0.8224,
	"step": 549
	},
	{
	"epoch": 0.24466192170818504,
	"grad_norm": 0.3426155745983124,
	"learning_rate": 9.045226130653267e-05,
	"loss": 0.8894,
	"step": 550
	},
	{
	"epoch": 0.2451067615658363,
	"grad_norm": 0.3958059549331665,
	"learning_rate": 9.025125628140704e-05,
	"loss": 0.9429,
	"step": 551
	},
	{
	"epoch": 0.24555160142348753,
	"grad_norm": 0.4533610939979553,
	"learning_rate": 9.005025125628141e-05,
	"loss": 1.2454,
	"step": 552
	},
	{
	"epoch": 0.2459964412811388,
	"grad_norm": 0.36734533309936523,
	"learning_rate": 8.984924623115579e-05,
	"loss": 1.0402,
	"step": 553
	},
	{
	"epoch": 0.24644128113879005,
	"grad_norm": 0.42336615920066833,
	"learning_rate": 8.964824120603016e-05,
	"loss": 1.1425,
	"step": 554
	},
	{
	"epoch": 0.24688612099644128,
	"grad_norm": 0.44834333658218384,
	"learning_rate": 8.944723618090453e-05,
	"loss": 0.8959,
	"step": 555
	},
	{
	"epoch": 0.24733096085409254,
	"grad_norm": 0.3234824240207672,
	"learning_rate": 8.92462311557789e-05,
	"loss": 0.9743,
	"step": 556
	},
	{
	"epoch": 0.24777580071174377,
	"grad_norm": 0.3193085491657257,
	"learning_rate": 8.904522613065326e-05,
	"loss": 0.9685,
	"step": 557
	},
	{
	"epoch": 0.24822064056939502,
	"grad_norm": 0.34897491335868835,
	"learning_rate": 8.884422110552765e-05,
	"loss": 0.9132,
	"step": 558
	},
	{
	"epoch": 0.24866548042704625,
	"grad_norm": 0.3368080258369446,
	"learning_rate": 8.864321608040201e-05,
	"loss": 0.9678,
	"step": 559
	},
	{
	"epoch": 0.2491103202846975,
	"grad_norm": 0.3948473036289215,
	"learning_rate": 8.84422110552764e-05,
	"loss": 0.993,
	"step": 560
	},
	{
	"epoch": 0.24955516014234874,
	"grad_norm": 0.40010324120521545,
	"learning_rate": 8.824120603015076e-05,
	"loss": 0.9779,
	"step": 561
	},
	{
	"epoch": 0.25,
	"grad_norm": 0.40043747425079346,
	"learning_rate": 8.804020100502513e-05,
	"loss": 0.9633,
	"step": 562
	},
	{
	"epoch": 0.25044483985765126,
	"grad_norm": 0.3239743709564209,
	"learning_rate": 8.78391959798995e-05,
	"loss": 0.939,
	"step": 563
	},
	{
	"epoch": 0.2508896797153025,
	"grad_norm": 0.3126469552516937,
	"learning_rate": 8.763819095477387e-05,
	"loss": 0.8804,
	"step": 564
	},
	{
	"epoch": 0.2513345195729537,
	"grad_norm": 0.32982298731803894,
	"learning_rate": 8.743718592964825e-05,
	"loss": 0.8698,
	"step": 565
	},
	{
	"epoch": 0.251779359430605,
	"grad_norm": 0.4134942293167114,
	"learning_rate": 8.723618090452261e-05,
	"loss": 0.9995,
	"step": 566
	},
	{
	"epoch": 0.25222419928825623,
	"grad_norm": 0.34916219115257263,
	"learning_rate": 8.7035175879397e-05,
	"loss": 0.9463,
	"step": 567
	},
	{
	"epoch": 0.2526690391459075,
	"grad_norm": 0.3170456886291504,
	"learning_rate": 8.683417085427135e-05,
	"loss": 0.9028,
	"step": 568
	},
	{
	"epoch": 0.2531138790035587,
	"grad_norm": 0.377886027097702,
	"learning_rate": 8.663316582914574e-05,
	"loss": 0.9503,
	"step": 569
	},
	{
	"epoch": 0.25355871886120995,
	"grad_norm": 0.3546507656574249,
	"learning_rate": 8.64321608040201e-05,
	"loss": 0.9509,
	"step": 570
	},
	{
	"epoch": 0.2540035587188612,
	"grad_norm": 0.38051891326904297,
	"learning_rate": 8.623115577889449e-05,
	"loss": 0.8949,
	"step": 571
	},
	{
	"epoch": 0.25444839857651247,
	"grad_norm": 0.2983281910419464,
	"learning_rate": 8.603015075376884e-05,
	"loss": 1.0411,
	"step": 572
	},
	{
	"epoch": 0.2548932384341637,
	"grad_norm": 0.3371431529521942,
	"learning_rate": 8.582914572864322e-05,
	"loss": 0.93,
	"step": 573
	},
	{
	"epoch": 0.25533807829181493,
	"grad_norm": 0.39484888315200806,
	"learning_rate": 8.562814070351759e-05,
	"loss": 0.9633,
	"step": 574
	},
	{
	"epoch": 0.2557829181494662,
	"grad_norm": 0.34015023708343506,
	"learning_rate": 8.542713567839196e-05,
	"loss": 0.944,
	"step": 575
	},
	{
	"epoch": 0.25622775800711745,
	"grad_norm": 0.32014408707618713,
	"learning_rate": 8.522613065326634e-05,
	"loss": 0.829,
	"step": 576
	},
	{
	"epoch": 0.2566725978647687,
	"grad_norm": 0.42777976393699646,
	"learning_rate": 8.502512562814071e-05,
	"loss": 1.0107,
	"step": 577
	},
	{
	"epoch": 0.2571174377224199,
	"grad_norm": 0.35483554005622864,
	"learning_rate": 8.482412060301508e-05,
	"loss": 0.9974,
	"step": 578
	},
	{
	"epoch": 0.25756227758007116,
	"grad_norm": 0.41365087032318115,
	"learning_rate": 8.462311557788946e-05,
	"loss": 1.062,
	"step": 579
	},
	{
	"epoch": 0.2580071174377224,
	"grad_norm": 0.37708839774131775,
	"learning_rate": 8.442211055276383e-05,
	"loss": 0.9283,
	"step": 580
	},
	{
	"epoch": 0.2584519572953737,
	"grad_norm": 0.40330421924591064,
	"learning_rate": 8.42211055276382e-05,
	"loss": 0.9482,
	"step": 581
	},
	{
	"epoch": 0.25889679715302494,
	"grad_norm": 0.2943516969680786,
	"learning_rate": 8.402010050251256e-05,
	"loss": 0.9211,
	"step": 582
	},
	{
	"epoch": 0.25934163701067614,
	"grad_norm": 0.3328346610069275,
	"learning_rate": 8.381909547738695e-05,
	"loss": 0.9264,
	"step": 583
	},
	{
	"epoch": 0.2597864768683274,
	"grad_norm": 0.4220837354660034,
	"learning_rate": 8.36180904522613e-05,
	"loss": 0.9598,
	"step": 584
	},
	{
	"epoch": 0.26023131672597866,
	"grad_norm": 0.36552560329437256,
	"learning_rate": 8.341708542713568e-05,
	"loss": 0.9275,
	"step": 585
	},
	{
	"epoch": 0.2606761565836299,
	"grad_norm": 0.36409202218055725,
	"learning_rate": 8.321608040201005e-05,
	"loss": 0.9246,
	"step": 586
	},
	{
	"epoch": 0.2611209964412811,
	"grad_norm": 0.3256109952926636,
	"learning_rate": 8.301507537688443e-05,
	"loss": 0.9725,
	"step": 587
	},
	{
	"epoch": 0.2615658362989324,
	"grad_norm": 0.31421205401420593,
	"learning_rate": 8.28140703517588e-05,
	"loss": 0.9276,
	"step": 588
	},
	{
	"epoch": 0.26201067615658363,
	"grad_norm": 0.33592212200164795,
	"learning_rate": 8.261306532663317e-05,
	"loss": 0.964,
	"step": 589
	},
	{
	"epoch": 0.2624555160142349,
	"grad_norm": 0.49559640884399414,
	"learning_rate": 8.241206030150754e-05,
	"loss": 0.9688,
	"step": 590
	},
	{
	"epoch": 0.2629003558718861,
	"grad_norm": 0.37904122471809387,
	"learning_rate": 8.22110552763819e-05,
	"loss": 0.8688,
	"step": 591
	},
	{
	"epoch": 0.26334519572953735,
	"grad_norm": 0.38418570160865784,
	"learning_rate": 8.201005025125629e-05,
	"loss": 0.966,
	"step": 592
	},
	{
	"epoch": 0.2637900355871886,
	"grad_norm": 0.3689751923084259,
	"learning_rate": 8.180904522613065e-05,
	"loss": 0.8973,
	"step": 593
	},
	{
	"epoch": 0.26423487544483987,
	"grad_norm": 0.3859994113445282,
	"learning_rate": 8.160804020100504e-05,
	"loss": 1.034,
	"step": 594
	},
	{
	"epoch": 0.2646797153024911,
	"grad_norm": 0.4068204164505005,
	"learning_rate": 8.14070351758794e-05,
	"loss": 0.931,
	"step": 595
	},
	{
	"epoch": 0.26512455516014233,
	"grad_norm": 0.3974169194698334,
	"learning_rate": 8.120603015075378e-05,
	"loss": 1.055,
	"step": 596
	},
	{
	"epoch": 0.2655693950177936,
	"grad_norm": 0.3818078637123108,
	"learning_rate": 8.100502512562814e-05,
	"loss": 1.0652,
	"step": 597
	},
	{
	"epoch": 0.26601423487544484,
	"grad_norm": 0.3733369708061218,
	"learning_rate": 8.080402010050251e-05,
	"loss": 0.9981,
	"step": 598
	},
	{
	"epoch": 0.2664590747330961,
	"grad_norm": 0.3980778455734253,
	"learning_rate": 8.060301507537689e-05,
	"loss": 0.982,
	"step": 599
	},
	{
	"epoch": 0.2669039145907473,
	"grad_norm": 0.3596290051937103,
	"learning_rate": 8.040201005025126e-05,
	"loss": 0.896,
	"step": 600
	},
	{
	"epoch": 0.26734875444839856,
	"grad_norm": 0.330400675535202,
	"learning_rate": 8.020100502512563e-05,
	"loss": 0.9776,
	"step": 601
	},
	{
	"epoch": 0.2677935943060498,
	"grad_norm": 0.3300771415233612,
	"learning_rate": 8e-05,
	"loss": 1.0576,
	"step": 602
	},
	{
	"epoch": 0.2682384341637011,
	"grad_norm": 0.36290502548217773,
	"learning_rate": 7.979899497487438e-05,
	"loss": 0.9378,
	"step": 603
	},
	{
	"epoch": 0.26868327402135234,
	"grad_norm": 0.3487003743648529,
	"learning_rate": 7.959798994974875e-05,
	"loss": 0.9146,
	"step": 604
	},
	{
	"epoch": 0.26912811387900354,
	"grad_norm": 0.3635631501674652,
	"learning_rate": 7.939698492462313e-05,
	"loss": 0.9758,
	"step": 605
	},
	{
	"epoch": 0.2695729537366548,
	"grad_norm": 0.35547807812690735,
	"learning_rate": 7.91959798994975e-05,
	"loss": 1.0026,
	"step": 606
	},
	{
	"epoch": 0.27001779359430605,
	"grad_norm": 0.33966726064682007,
	"learning_rate": 7.899497487437186e-05,
	"loss": 0.9059,
	"step": 607
	},
	{
	"epoch": 0.2704626334519573,
	"grad_norm": 0.3759270906448364,
	"learning_rate": 7.879396984924623e-05,
	"loss": 1.0088,
	"step": 608
	},
	{
	"epoch": 0.2709074733096085,
	"grad_norm": 0.36231812834739685,
	"learning_rate": 7.85929648241206e-05,
	"loss": 0.9903,
	"step": 609
	},
	{
	"epoch": 0.2713523131672598,
	"grad_norm": 0.389737069606781,
	"learning_rate": 7.839195979899498e-05,
	"loss": 0.8706,
	"step": 610
	},
	{
	"epoch": 0.27179715302491103,
	"grad_norm": 0.43408992886543274,
	"learning_rate": 7.819095477386935e-05,
	"loss": 0.9437,
	"step": 611
	},
	{
	"epoch": 0.2722419928825623,
	"grad_norm": 0.29707393050193787,
	"learning_rate": 7.798994974874372e-05,
	"loss": 0.8361,
	"step": 612
	},
	{
	"epoch": 0.27268683274021355,
	"grad_norm": 0.3469286561012268,
	"learning_rate": 7.77889447236181e-05,
	"loss": 1.0085,
	"step": 613
	},
	{
	"epoch": 0.27313167259786475,
	"grad_norm": 0.4895261824131012,
	"learning_rate": 7.758793969849247e-05,
	"loss": 0.8068,
	"step": 614
	},
	{
	"epoch": 0.273576512455516,
	"grad_norm": 0.3655518591403961,
	"learning_rate": 7.738693467336684e-05,
	"loss": 0.9248,
	"step": 615
	},
	{
	"epoch": 0.27402135231316727,
	"grad_norm": 0.36705151200294495,
	"learning_rate": 7.71859296482412e-05,
	"loss": 1.0244,
	"step": 616
	},
	{
	"epoch": 0.2744661921708185,
	"grad_norm": 0.3587312400341034,
	"learning_rate": 7.698492462311559e-05,
	"loss": 1.0522,
	"step": 617
	},
	{
	"epoch": 0.2749110320284697,
	"grad_norm": 0.3758445680141449,
	"learning_rate": 7.678391959798995e-05,
	"loss": 0.9904,
	"step": 618
	},
	{
	"epoch": 0.275355871886121,
	"grad_norm": 0.31862393021583557,
	"learning_rate": 7.658291457286433e-05,
	"loss": 0.9216,
	"step": 619
	},
	{
	"epoch": 0.27580071174377224,
	"grad_norm": 0.4419485926628113,
	"learning_rate": 7.638190954773869e-05,
	"loss": 0.9049,
	"step": 620
	},
	{
	"epoch": 0.2762455516014235,
	"grad_norm": 0.38519737124443054,
	"learning_rate": 7.618090452261307e-05,
	"loss": 0.9116,
	"step": 621
	},
	{
	"epoch": 0.27669039145907476,
	"grad_norm": 0.37228310108184814,
	"learning_rate": 7.597989949748744e-05,
	"loss": 0.9491,
	"step": 622
	},
	{
	"epoch": 0.27713523131672596,
	"grad_norm": 0.3866081237792969,
	"learning_rate": 7.577889447236181e-05,
	"loss": 0.9477,
	"step": 623
	},
	{
	"epoch": 0.2775800711743772,
	"grad_norm": 0.32213568687438965,
	"learning_rate": 7.557788944723618e-05,
	"loss": 0.9049,
	"step": 624
	},
	{
	"epoch": 0.2780249110320285,
	"grad_norm": 0.34658104181289673,
	"learning_rate": 7.537688442211056e-05,
	"loss": 0.9486,
	"step": 625
	},
	{
	"epoch": 0.27846975088967973,
	"grad_norm": 0.42399680614471436,
	"learning_rate": 7.517587939698493e-05,
	"loss": 1.0776,
	"step": 626
	},
	{
	"epoch": 0.27891459074733094,
	"grad_norm": 0.3882311284542084,
	"learning_rate": 7.49748743718593e-05,
	"loss": 0.9711,
	"step": 627
	},
	{
	"epoch": 0.2793594306049822,
	"grad_norm": 0.34879353642463684,
	"learning_rate": 7.477386934673368e-05,
	"loss": 0.9253,
	"step": 628
	},
	{
	"epoch": 0.27980427046263345,
	"grad_norm": 0.3509921431541443,
	"learning_rate": 7.457286432160805e-05,
	"loss": 0.9711,
	"step": 629
	},
	{
	"epoch": 0.2802491103202847,
	"grad_norm": 0.32417482137680054,
	"learning_rate": 7.437185929648241e-05,
	"loss": 0.9612,
	"step": 630
	},
	{
	"epoch": 0.28069395017793597,
	"grad_norm": 0.3800636827945709,
	"learning_rate": 7.417085427135678e-05,
	"loss": 1.0243,
	"step": 631
	},
	{
	"epoch": 0.28113879003558717,
	"grad_norm": 0.38342657685279846,
	"learning_rate": 7.396984924623115e-05,
	"loss": 0.9895,
	"step": 632
	},
	{
	"epoch": 0.28158362989323843,
	"grad_norm": 0.40875983238220215,
	"learning_rate": 7.376884422110553e-05,
	"loss": 0.9428,
	"step": 633
	},
	{
	"epoch": 0.2820284697508897,
	"grad_norm": 0.34101223945617676,
	"learning_rate": 7.35678391959799e-05,
	"loss": 0.977,
	"step": 634
	},
	{
	"epoch": 0.28247330960854095,
	"grad_norm": 0.3537449836730957,
	"learning_rate": 7.336683417085427e-05,
	"loss": 0.9458,
	"step": 635
	},
	{
	"epoch": 0.28291814946619215,
	"grad_norm": 0.3816027343273163,
	"learning_rate": 7.316582914572865e-05,
	"loss": 1.0429,
	"step": 636
	},
	{
	"epoch": 0.2833629893238434,
	"grad_norm": 0.39149439334869385,
	"learning_rate": 7.296482412060302e-05,
	"loss": 0.974,
	"step": 637
	},
	{
	"epoch": 0.28380782918149466,
	"grad_norm": 0.4129406809806824,
	"learning_rate": 7.276381909547739e-05,
	"loss": 0.988,
	"step": 638
	},
	{
	"epoch": 0.2842526690391459,
	"grad_norm": 0.36804699897766113,
	"learning_rate": 7.256281407035177e-05,
	"loss": 0.9375,
	"step": 639
	},
	{
	"epoch": 0.2846975088967972,
	"grad_norm": 0.40363621711730957,
	"learning_rate": 7.236180904522614e-05,
	"loss": 0.9214,
	"step": 640
	},
	{
	"epoch": 0.2851423487544484,
	"grad_norm": 0.3833264708518982,
	"learning_rate": 7.21608040201005e-05,
	"loss": 0.9454,
	"step": 641
	},
	{
	"epoch": 0.28558718861209964,
	"grad_norm": 0.38440853357315063,
	"learning_rate": 7.195979899497488e-05,
	"loss": 0.8841,
	"step": 642
	},
	{
	"epoch": 0.2860320284697509,
	"grad_norm": 0.42536038160324097,
	"learning_rate": 7.175879396984924e-05,
	"loss": 0.9665,
	"step": 643
	},
	{
	"epoch": 0.28647686832740216,
	"grad_norm": 0.33145928382873535,
	"learning_rate": 7.155778894472363e-05,
	"loss": 0.9118,
	"step": 644
	},
	{
	"epoch": 0.28692170818505336,
	"grad_norm": 0.37018072605133057,
	"learning_rate": 7.135678391959799e-05,
	"loss": 0.9424,
	"step": 645
	},
	{
	"epoch": 0.2873665480427046,
	"grad_norm": 0.37953630089759827,
	"learning_rate": 7.115577889447236e-05,
	"loss": 0.9889,
	"step": 646
	},
	{
	"epoch": 0.2878113879003559,
	"grad_norm": 0.3908143937587738,
	"learning_rate": 7.095477386934674e-05,
	"loss": 0.9815,
	"step": 647
	},
	{
	"epoch": 0.28825622775800713,
	"grad_norm": 0.38611525297164917,
	"learning_rate": 7.075376884422111e-05,
	"loss": 1.0179,
	"step": 648
	},
	{
	"epoch": 0.28870106761565834,
	"grad_norm": 0.4868585169315338,
	"learning_rate": 7.055276381909548e-05,
	"loss": 1.0551,
	"step": 649
	},
	{
	"epoch": 0.2891459074733096,
	"grad_norm": 0.4477649927139282,
	"learning_rate": 7.035175879396985e-05,
	"loss": 0.8795,
	"step": 650
	},
	{
	"epoch": 0.28959074733096085,
	"grad_norm": 0.3561191260814667,
	"learning_rate": 7.015075376884423e-05,
	"loss": 0.8927,
	"step": 651
	},
	{
	"epoch": 0.2900355871886121,
	"grad_norm": 0.4050782024860382,
	"learning_rate": 6.99497487437186e-05,
	"loss": 0.9032,
	"step": 652
	},
	{
	"epoch": 0.29048042704626337,
	"grad_norm": 0.3366767466068268,
	"learning_rate": 6.974874371859297e-05,
	"loss": 0.9763,
	"step": 653
	},
	{
	"epoch": 0.29092526690391457,
	"grad_norm": 0.33202803134918213,
	"learning_rate": 6.954773869346733e-05,
	"loss": 0.8999,
	"step": 654
	},
	{
	"epoch": 0.29137010676156583,
	"grad_norm": 0.38702261447906494,
	"learning_rate": 6.93467336683417e-05,
	"loss": 0.8882,
	"step": 655
	},
	{
	"epoch": 0.2918149466192171,
	"grad_norm": 0.39489811658859253,
	"learning_rate": 6.914572864321608e-05,
	"loss": 0.8615,
	"step": 656
	},
	{
	"epoch": 0.29225978647686834,
	"grad_norm": 0.407174289226532,
	"learning_rate": 6.894472361809045e-05,
	"loss": 0.8805,
	"step": 657
	},
	{
	"epoch": 0.29270462633451955,
	"grad_norm": 0.3787648379802704,
	"learning_rate": 6.874371859296482e-05,
	"loss": 0.9016,
	"step": 658
	},
	{
	"epoch": 0.2931494661921708,
	"grad_norm": 0.3571087121963501,
	"learning_rate": 6.85427135678392e-05,
	"loss": 0.9484,
	"step": 659
	},
	{
	"epoch": 0.29359430604982206,
	"grad_norm": 0.41471996903419495,
	"learning_rate": 6.834170854271357e-05,
	"loss": 1.0558,
	"step": 660
	},
	{
	"epoch": 0.2940391459074733,
	"grad_norm": 0.4297381639480591,
	"learning_rate": 6.814070351758794e-05,
	"loss": 0.9687,
	"step": 661
	},
	{
	"epoch": 0.2944839857651246,
	"grad_norm": 0.33687537908554077,
	"learning_rate": 6.793969849246232e-05,
	"loss": 0.9266,
	"step": 662
	},
	{
	"epoch": 0.2949288256227758,
	"grad_norm": 0.34857362508773804,
	"learning_rate": 6.773869346733669e-05,
	"loss": 0.8977,
	"step": 663
	},
	{
	"epoch": 0.29537366548042704,
	"grad_norm": 0.4323669672012329,
	"learning_rate": 6.753768844221105e-05,
	"loss": 0.9535,
	"step": 664
	},
	{
	"epoch": 0.2958185053380783,
	"grad_norm": 0.36021220684051514,
	"learning_rate": 6.733668341708544e-05,
	"loss": 0.9457,
	"step": 665
	},
	{
	"epoch": 0.29626334519572955,
	"grad_norm": 0.3574046492576599,
	"learning_rate": 6.71356783919598e-05,
	"loss": 0.9377,
	"step": 666
	},
	{
	"epoch": 0.29670818505338076,
	"grad_norm": 0.3637276291847229,
	"learning_rate": 6.693467336683418e-05,
	"loss": 0.9769,
	"step": 667
	},
	{
	"epoch": 0.297153024911032,
	"grad_norm": 0.4049037992954254,
	"learning_rate": 6.673366834170854e-05,
	"loss": 1.0499,
	"step": 668
	},
	{
	"epoch": 0.2975978647686833,
	"grad_norm": 0.4278649687767029,
	"learning_rate": 6.653266331658293e-05,
	"loss": 0.8835,
	"step": 669
	},
	{
	"epoch": 0.29804270462633453,
	"grad_norm": 0.37466055154800415,
	"learning_rate": 6.633165829145729e-05,
	"loss": 0.9493,
	"step": 670
	},
	{
	"epoch": 0.2984875444839858,
	"grad_norm": 0.3321222960948944,
	"learning_rate": 6.613065326633166e-05,
	"loss": 0.9484,
	"step": 671
	},
	{
	"epoch": 0.298932384341637,
	"grad_norm": 0.4597262740135193,
	"learning_rate": 6.592964824120603e-05,
	"loss": 1.0418,
	"step": 672
	},
	{
	"epoch": 0.29937722419928825,
	"grad_norm": 0.3227452337741852,
	"learning_rate": 6.57286432160804e-05,
	"loss": 0.9323,
	"step": 673
	},
	{
	"epoch": 0.2998220640569395,
	"grad_norm": 0.3192083239555359,
	"learning_rate": 6.552763819095478e-05,
	"loss": 0.9021,
	"step": 674
	},
	{
	"epoch": 0.30026690391459077,
	"grad_norm": 0.4145206809043884,
	"learning_rate": 6.532663316582915e-05,
	"loss": 0.9431,
	"step": 675
	},
	{
	"epoch": 0.30071174377224197,
	"grad_norm": 0.4133388102054596,
	"learning_rate": 6.512562814070352e-05,
	"loss": 0.9673,
	"step": 676
	},
	{
	"epoch": 0.3011565836298932,
	"grad_norm": 0.4261665940284729,
	"learning_rate": 6.492462311557788e-05,
	"loss": 1.0128,
	"step": 677
	},
	{
	"epoch": 0.3016014234875445,
	"grad_norm": 0.33553653955459595,
	"learning_rate": 6.472361809045227e-05,
	"loss": 0.9103,
	"step": 678
	},
	{
	"epoch": 0.30204626334519574,
	"grad_norm": 0.378233939409256,
	"learning_rate": 6.452261306532663e-05,
	"loss": 1.1017,
	"step": 679
	},
	{
	"epoch": 0.302491103202847,
	"grad_norm": 0.39459675550460815,
	"learning_rate": 6.4321608040201e-05,
	"loss": 0.9056,
	"step": 680
	},
	{
	"epoch": 0.3029359430604982,
	"grad_norm": 0.31103044748306274,
	"learning_rate": 6.412060301507538e-05,
	"loss": 0.896,
	"step": 681
	},
	{
	"epoch": 0.30338078291814946,
	"grad_norm": 0.32393592596054077,
	"learning_rate": 6.391959798994975e-05,
	"loss": 0.9233,
	"step": 682
	},
	{
	"epoch": 0.3038256227758007,
	"grad_norm": 0.37299537658691406,
	"learning_rate": 6.371859296482412e-05,
	"loss": 0.9587,
	"step": 683
	},
	{
	"epoch": 0.304270462633452,
	"grad_norm": 0.3141055703163147,
	"learning_rate": 6.35175879396985e-05,
	"loss": 0.9314,
	"step": 684
	},
	{
	"epoch": 0.3047153024911032,
	"grad_norm": 0.32686251401901245,
	"learning_rate": 6.331658291457287e-05,
	"loss": 0.9325,
	"step": 685
	},
	{
	"epoch": 0.30516014234875444,
	"grad_norm": 0.35383760929107666,
	"learning_rate": 6.311557788944724e-05,
	"loss": 0.9834,
	"step": 686
	},
	{
	"epoch": 0.3056049822064057,
	"grad_norm": 0.30796653032302856,
	"learning_rate": 6.291457286432161e-05,
	"loss": 0.8842,
	"step": 687
	},
	{
	"epoch": 0.30604982206405695,
	"grad_norm": 0.31963440775871277,
	"learning_rate": 6.271356783919599e-05,
	"loss": 0.9372,
	"step": 688
	},
	{
	"epoch": 0.3064946619217082,
	"grad_norm": 0.43244582414627075,
	"learning_rate": 6.251256281407035e-05,
	"loss": 0.939,
	"step": 689
	},
	{
	"epoch": 0.3069395017793594,
	"grad_norm": 0.32725387811660767,
	"learning_rate": 6.231155778894473e-05,
	"loss": 0.8819,
	"step": 690
	},
	{
	"epoch": 0.30738434163701067,
	"grad_norm": 0.3326573371887207,
	"learning_rate": 6.211055276381909e-05,
	"loss": 1.0157,
	"step": 691
	},
	{
	"epoch": 0.30782918149466193,
	"grad_norm": 0.3267054855823517,
	"learning_rate": 6.190954773869348e-05,
	"loss": 0.8806,
	"step": 692
	},
	{
	"epoch": 0.3082740213523132,
	"grad_norm": 0.3328150808811188,
	"learning_rate": 6.170854271356784e-05,
	"loss": 0.9752,
	"step": 693
	},
	{
	"epoch": 0.3087188612099644,
	"grad_norm": 0.42537713050842285,
	"learning_rate": 6.150753768844222e-05,
	"loss": 0.8548,
	"step": 694
	},
	{
	"epoch": 0.30916370106761565,
	"grad_norm": 0.339568167924881,
	"learning_rate": 6.130653266331658e-05,
	"loss": 0.9964,
	"step": 695
	},
	{
	"epoch": 0.3096085409252669,
	"grad_norm": 0.40449681878089905,
	"learning_rate": 6.110552763819096e-05,
	"loss": 0.8698,
	"step": 696
	},
	{
	"epoch": 0.31005338078291816,
	"grad_norm": 0.42647475004196167,
	"learning_rate": 6.090452261306533e-05,
	"loss": 0.9248,
	"step": 697
	},
	{
	"epoch": 0.3104982206405694,
	"grad_norm": 0.3729760944843292,
	"learning_rate": 6.070351758793971e-05,
	"loss": 0.9945,
	"step": 698
	},
	{
	"epoch": 0.3109430604982206,
	"grad_norm": 0.34256407618522644,
	"learning_rate": 6.0502512562814076e-05,
	"loss": 0.8589,
	"step": 699
	},
	{
	"epoch": 0.3113879003558719,
	"grad_norm": 0.37434864044189453,
	"learning_rate": 6.030150753768844e-05,
	"loss": 0.8921,
	"step": 700
	},
	{
	"epoch": 0.31183274021352314,
	"grad_norm": 0.3740074336528778,
	"learning_rate": 6.0100502512562815e-05,
	"loss": 0.9538,
	"step": 701
	},
	{
	"epoch": 0.3122775800711744,
	"grad_norm": 0.2955409288406372,
	"learning_rate": 5.989949748743718e-05,
	"loss": 0.8796,
	"step": 702
	},
	{
	"epoch": 0.3127224199288256,
	"grad_norm": 0.41662129759788513,
	"learning_rate": 5.969849246231156e-05,
	"loss": 0.976,
	"step": 703
	},
	{
	"epoch": 0.31316725978647686,
	"grad_norm": 0.3664419651031494,
	"learning_rate": 5.949748743718593e-05,
	"loss": 0.9813,
	"step": 704
	},
	{
	"epoch": 0.3136120996441281,
	"grad_norm": 0.4082026779651642,
	"learning_rate": 5.929648241206031e-05,
	"loss": 0.9604,
	"step": 705
	},
	{
	"epoch": 0.3140569395017794,
	"grad_norm": 0.34381023049354553,
	"learning_rate": 5.909547738693467e-05,
	"loss": 0.9438,
	"step": 706
	},
	{
	"epoch": 0.3145017793594306,
	"grad_norm": 0.32259446382522583,
	"learning_rate": 5.889447236180905e-05,
	"loss": 0.8939,
	"step": 707
	},
	{
	"epoch": 0.31494661921708184,
	"grad_norm": 0.4534759819507599,
	"learning_rate": 5.869346733668342e-05,
	"loss": 1.0139,
	"step": 708
	},
	{
	"epoch": 0.3153914590747331,
	"grad_norm": 0.3195643723011017,
	"learning_rate": 5.849246231155779e-05,
	"loss": 0.8886,
	"step": 709
	},
	{
	"epoch": 0.31583629893238435,
	"grad_norm": 0.34111788868904114,
	"learning_rate": 5.829145728643216e-05,
	"loss": 0.9006,
	"step": 710
	},
	{
	"epoch": 0.3162811387900356,
	"grad_norm": 0.32145076990127563,
	"learning_rate": 5.809045226130654e-05,
	"loss": 0.9042,
	"step": 711
	},
	{
	"epoch": 0.3167259786476868,
	"grad_norm": 0.38871321082115173,
	"learning_rate": 5.7889447236180904e-05,
	"loss": 0.8285,
	"step": 712
	},
	{
	"epoch": 0.31717081850533807,
	"grad_norm": 0.34648001194000244,
	"learning_rate": 5.7688442211055284e-05,
	"loss": 0.8948,
	"step": 713
	},
	{
	"epoch": 0.31761565836298933,
	"grad_norm": 0.42825043201446533,
	"learning_rate": 5.748743718592965e-05,
	"loss": 0.9665,
	"step": 714
	},
	{
	"epoch": 0.3180604982206406,
	"grad_norm": 0.4382406771183014,
	"learning_rate": 5.728643216080403e-05,
	"loss": 1.0006,
	"step": 715
	},
	{
	"epoch": 0.3185053380782918,
	"grad_norm": 0.3384600579738617,
	"learning_rate": 5.7085427135678396e-05,
	"loss": 0.893,
	"step": 716
	},
	{
	"epoch": 0.31895017793594305,
	"grad_norm": 0.329862505197525,
	"learning_rate": 5.688442211055277e-05,
	"loss": 0.8064,
	"step": 717
	},
	{
	"epoch": 0.3193950177935943,
	"grad_norm": 0.36745166778564453,
	"learning_rate": 5.6683417085427135e-05,
	"loss": 0.9621,
	"step": 718
	},
	{
	"epoch": 0.31983985765124556,
	"grad_norm": 0.36240148544311523,
	"learning_rate": 5.6482412060301515e-05,
	"loss": 0.8902,
	"step": 719
	},
	{
	"epoch": 0.3202846975088968,
	"grad_norm": 0.3071519136428833,
	"learning_rate": 5.628140703517588e-05,
	"loss": 0.9572,
	"step": 720
	},
	{
	"epoch": 0.320729537366548,
	"grad_norm": 0.3393983840942383,
	"learning_rate": 5.608040201005026e-05,
	"loss": 0.9021,
	"step": 721
	},
	{
	"epoch": 0.3211743772241993,
	"grad_norm": 0.3375062942504883,
	"learning_rate": 5.587939698492463e-05,
	"loss": 0.8505,
	"step": 722
	},
	{
	"epoch": 0.32161921708185054,
	"grad_norm": 0.36166590452194214,
	"learning_rate": 5.567839195979899e-05,
	"loss": 0.9217,
	"step": 723
	},
	{
	"epoch": 0.3220640569395018,
	"grad_norm": 0.423556923866272,
	"learning_rate": 5.547738693467337e-05,
	"loss": 0.8722,
	"step": 724
	},
	{
	"epoch": 0.322508896797153,
	"grad_norm": 0.3881194293498993,
	"learning_rate": 5.527638190954774e-05,
	"loss": 0.9246,
	"step": 725
	},
	{
	"epoch": 0.32295373665480426,
	"grad_norm": 0.41174158453941345,
	"learning_rate": 5.507537688442211e-05,
	"loss": 0.9962,
	"step": 726
	},
	{
	"epoch": 0.3233985765124555,
	"grad_norm": 0.3954409062862396,
	"learning_rate": 5.487437185929648e-05,
	"loss": 0.8989,
	"step": 727
	},
	{
	"epoch": 0.3238434163701068,
	"grad_norm": 0.371358722448349,
	"learning_rate": 5.467336683417086e-05,
	"loss": 0.9655,
	"step": 728
	},
	{
	"epoch": 0.32428825622775803,
	"grad_norm": 0.4016554355621338,
	"learning_rate": 5.4472361809045224e-05,
	"loss": 0.9298,
	"step": 729
	},
	{
	"epoch": 0.32473309608540923,
	"grad_norm": 0.45096760988235474,
	"learning_rate": 5.4271356783919604e-05,
	"loss": 1.0633,
	"step": 730
	},
	{
	"epoch": 0.3251779359430605,
	"grad_norm": 0.42669543623924255,
	"learning_rate": 5.407035175879397e-05,
	"loss": 0.9897,
	"step": 731
	},
	{
	"epoch": 0.32562277580071175,
	"grad_norm": 0.3072949945926666,
	"learning_rate": 5.386934673366835e-05,
	"loss": 0.8538,
	"step": 732
	},
	{
	"epoch": 0.326067615658363,
	"grad_norm": 0.3681629002094269,
	"learning_rate": 5.3668341708542716e-05,
	"loss": 0.9532,
	"step": 733
	},
	{
	"epoch": 0.3265124555160142,
	"grad_norm": 0.3667491674423218,
	"learning_rate": 5.346733668341709e-05,
	"loss": 0.9971,
	"step": 734
	},
	{
	"epoch": 0.32695729537366547,
	"grad_norm": 0.3561696410179138,
	"learning_rate": 5.3266331658291455e-05,
	"loss": 0.9332,
	"step": 735
	},
	{
	"epoch": 0.3274021352313167,
	"grad_norm": 0.51035076379776,
	"learning_rate": 5.3065326633165835e-05,
	"loss": 0.9938,
	"step": 736
	},
	{
	"epoch": 0.327846975088968,
	"grad_norm": 0.30791398882865906,
	"learning_rate": 5.28643216080402e-05,
	"loss": 0.9195,
	"step": 737
	},
	{
	"epoch": 0.32829181494661924,
	"grad_norm": 0.37262293696403503,
	"learning_rate": 5.266331658291458e-05,
	"loss": 0.8857,
	"step": 738
	},
	{
	"epoch": 0.32873665480427045,
	"grad_norm": 0.3665037751197815,
	"learning_rate": 5.246231155778895e-05,
	"loss": 0.8634,
	"step": 739
	},
	{
	"epoch": 0.3291814946619217,
	"grad_norm": 0.3333563804626465,
	"learning_rate": 5.226130653266332e-05,
	"loss": 0.9507,
	"step": 740
	},
	{
	"epoch": 0.32962633451957296,
	"grad_norm": 0.3755170404911041,
	"learning_rate": 5.206030150753769e-05,
	"loss": 0.9742,
	"step": 741
	},
	{
	"epoch": 0.3300711743772242,
	"grad_norm": 0.3400593101978302,
	"learning_rate": 5.1859296482412066e-05,
	"loss": 0.8169,
	"step": 742
	},
	{
	"epoch": 0.3305160142348754,
	"grad_norm": 0.5976528525352478,
	"learning_rate": 5.165829145728643e-05,
	"loss": 1.0596,
	"step": 743
	},
	{
	"epoch": 0.3309608540925267,
	"grad_norm": 0.3495856523513794,
	"learning_rate": 5.145728643216081e-05,
	"loss": 1.1273,
	"step": 744
	},
	{
	"epoch": 0.33140569395017794,
	"grad_norm": 0.3898337483406067,
	"learning_rate": 5.125628140703518e-05,
	"loss": 1.0512,
	"step": 745
	},
	{
	"epoch": 0.3318505338078292,
	"grad_norm": 0.4057525396347046,
	"learning_rate": 5.1055276381909544e-05,
	"loss": 1.0474,
	"step": 746
	},
	{
	"epoch": 0.33229537366548045,
	"grad_norm": 0.365829199552536,
	"learning_rate": 5.0854271356783924e-05,
	"loss": 1.0095,
	"step": 747
	},
	{
	"epoch": 0.33274021352313166,
	"grad_norm": 0.3495554029941559,
	"learning_rate": 5.065326633165829e-05,
	"loss": 0.8723,
	"step": 748
	},
	{
	"epoch": 0.3331850533807829,
	"grad_norm": 0.3964870572090149,
	"learning_rate": 5.045226130653266e-05,
	"loss": 0.9775,
	"step": 749
	},
	{
	"epoch": 0.33362989323843417,
	"grad_norm": 0.3591833710670471,
	"learning_rate": 5.0251256281407036e-05,
	"loss": 0.8438,
	"step": 750
	},
	{
	"epoch": 0.33407473309608543,
	"grad_norm": 0.4429285526275635,
	"learning_rate": 5.005025125628141e-05,
	"loss": 0.9144,
	"step": 751
	},
	{
	"epoch": 0.33451957295373663,
	"grad_norm": 0.3427349328994751,
	"learning_rate": 4.984924623115578e-05,
	"loss": 0.8728,
	"step": 752
	},
	{
	"epoch": 0.3349644128113879,
	"grad_norm": 0.4042606055736542,
	"learning_rate": 4.9648241206030155e-05,
	"loss": 1.0932,
	"step": 753
	},
	{
	"epoch": 0.33540925266903915,
	"grad_norm": 0.38601040840148926,
	"learning_rate": 4.944723618090453e-05,
	"loss": 0.8947,
	"step": 754
	},
	{
	"epoch": 0.3358540925266904,
	"grad_norm": 0.31526607275009155,
	"learning_rate": 4.92462311557789e-05,
	"loss": 0.8783,
	"step": 755
	},
	{
	"epoch": 0.33629893238434166,
	"grad_norm": 0.3799164593219757,
	"learning_rate": 4.9045226130653274e-05,
	"loss": 0.894,
	"step": 756
	},
	{
	"epoch": 0.33674377224199287,
	"grad_norm": 0.36352670192718506,
	"learning_rate": 4.884422110552764e-05,
	"loss": 1.0077,
	"step": 757
	},
	{
	"epoch": 0.3371886120996441,
	"grad_norm": 0.37971585988998413,
	"learning_rate": 4.864321608040201e-05,
	"loss": 0.8504,
	"step": 758
	},
	{
	"epoch": 0.3376334519572954,
	"grad_norm": 0.3846040666103363,
	"learning_rate": 4.844221105527638e-05,
	"loss": 0.9923,
	"step": 759
	},
	{
	"epoch": 0.33807829181494664,
	"grad_norm": 0.3028171956539154,
	"learning_rate": 4.824120603015075e-05,
	"loss": 0.9328,
	"step": 760
	},
	{
	"epoch": 0.33852313167259784,
	"grad_norm": 0.35201942920684814,
	"learning_rate": 4.8040201005025125e-05,
	"loss": 0.8622,
	"step": 761
	},
	{
	"epoch": 0.3389679715302491,
	"grad_norm": 0.3402314782142639,
	"learning_rate": 4.78391959798995e-05,
	"loss": 1.0578,
	"step": 762
	},
	{
	"epoch": 0.33941281138790036,
	"grad_norm": 0.43474265933036804,
	"learning_rate": 4.763819095477387e-05,
	"loss": 1.0589,
	"step": 763
	},
	{
	"epoch": 0.3398576512455516,
	"grad_norm": 0.4756290912628174,
	"learning_rate": 4.7437185929648244e-05,
	"loss": 1.1027,
	"step": 764
	},
	{
	"epoch": 0.3403024911032028,
	"grad_norm": 0.3077346682548523,
	"learning_rate": 4.723618090452262e-05,
	"loss": 0.9635,
	"step": 765
	},
	{
	"epoch": 0.3407473309608541,
	"grad_norm": 0.36826518177986145,
	"learning_rate": 4.703517587939698e-05,
	"loss": 0.9687,
	"step": 766
	},
	{
	"epoch": 0.34119217081850534,
	"grad_norm": 0.4034232199192047,
	"learning_rate": 4.6834170854271356e-05,
	"loss": 0.9209,
	"step": 767
	},
	{
	"epoch": 0.3416370106761566,
	"grad_norm": 0.41342729330062866,
	"learning_rate": 4.663316582914573e-05,
	"loss": 0.9213,
	"step": 768
	},
	{
	"epoch": 0.34208185053380785,
	"grad_norm": 0.3749755620956421,
	"learning_rate": 4.64321608040201e-05,
	"loss": 0.9386,
	"step": 769
	},
	{
	"epoch": 0.34252669039145905,
	"grad_norm": 0.3411119878292084,
	"learning_rate": 4.6231155778894475e-05,
	"loss": 0.8805,
	"step": 770
	},
	{
	"epoch": 0.3429715302491103,
	"grad_norm": 0.3668997883796692,
	"learning_rate": 4.603015075376885e-05,
	"loss": 0.9887,
	"step": 771
	},
	{
	"epoch": 0.34341637010676157,
	"grad_norm": 0.45816823840141296,
	"learning_rate": 4.582914572864322e-05,
	"loss": 0.9031,
	"step": 772
	},
	{
	"epoch": 0.34386120996441283,
	"grad_norm": 0.3586982786655426,
	"learning_rate": 4.5628140703517594e-05,
	"loss": 0.9774,
	"step": 773
	},
	{
	"epoch": 0.34430604982206403,
	"grad_norm": 0.42974361777305603,
	"learning_rate": 4.542713567839196e-05,
	"loss": 1.0421,
	"step": 774
	},
	{
	"epoch": 0.3447508896797153,
	"grad_norm": 0.36137211322784424,
	"learning_rate": 4.522613065326633e-05,
	"loss": 1.0171,
	"step": 775
	},
	{
	"epoch": 0.34519572953736655,
	"grad_norm": 0.413507342338562,
	"learning_rate": 4.5025125628140706e-05,
	"loss": 0.9786,
	"step": 776
	},
	{
	"epoch": 0.3456405693950178,
	"grad_norm": 0.42441946268081665,
	"learning_rate": 4.482412060301508e-05,
	"loss": 1.01,
	"step": 777
	},
	{
	"epoch": 0.34608540925266906,
	"grad_norm": 0.35072627663612366,
	"learning_rate": 4.462311557788945e-05,
	"loss": 0.9709,
	"step": 778
	},
	{
	"epoch": 0.34653024911032027,
	"grad_norm": 0.37332162261009216,
	"learning_rate": 4.4422110552763825e-05,
	"loss": 0.9844,
	"step": 779
	},
	{
	"epoch": 0.3469750889679715,
	"grad_norm": 0.388919860124588,
	"learning_rate": 4.42211055276382e-05,
	"loss": 0.8525,
	"step": 780
	},
	{
	"epoch": 0.3474199288256228,
	"grad_norm": 0.3552108407020569,
	"learning_rate": 4.4020100502512564e-05,
	"loss": 0.9713,
	"step": 781
	},
	{
	"epoch": 0.34786476868327404,
	"grad_norm": 0.3655258119106293,
	"learning_rate": 4.381909547738694e-05,
	"loss": 0.9532,
	"step": 782
	},
	{
	"epoch": 0.34830960854092524,
	"grad_norm": 0.38755300641059875,
	"learning_rate": 4.3618090452261303e-05,
	"loss": 0.9346,
	"step": 783
	},
	{
	"epoch": 0.3487544483985765,
	"grad_norm": 0.4547751843929291,
	"learning_rate": 4.3417085427135676e-05,
	"loss": 0.8872,
	"step": 784
	},
	{
	"epoch": 0.34919928825622776,
	"grad_norm": 0.2990303039550781,
	"learning_rate": 4.321608040201005e-05,
	"loss": 0.8979,
	"step": 785
	},
	{
	"epoch": 0.349644128113879,
	"grad_norm": 0.3581852614879608,
	"learning_rate": 4.301507537688442e-05,
	"loss": 0.9466,
	"step": 786
	},
	{
	"epoch": 0.3500889679715303,
	"grad_norm": 0.33662378787994385,
	"learning_rate": 4.2814070351758795e-05,
	"loss": 0.9274,
	"step": 787
	},
	{
	"epoch": 0.3505338078291815,
	"grad_norm": 0.34652361273765564,
	"learning_rate": 4.261306532663317e-05,
	"loss": 0.9791,
	"step": 788
	},
	{
	"epoch": 0.35097864768683273,
	"grad_norm": 0.39401039481163025,
	"learning_rate": 4.241206030150754e-05,
	"loss": 0.9485,
	"step": 789
	},
	{
	"epoch": 0.351423487544484,
	"grad_norm": 0.36272600293159485,
	"learning_rate": 4.2211055276381914e-05,
	"loss": 0.9341,
	"step": 790
	},
	{
	"epoch": 0.35186832740213525,
	"grad_norm": 0.3438786566257477,
	"learning_rate": 4.201005025125628e-05,
	"loss": 0.8061,
	"step": 791
	},
	{
	"epoch": 0.35231316725978645,
	"grad_norm": 0.396484911441803,
	"learning_rate": 4.180904522613065e-05,
	"loss": 0.9506,
	"step": 792
	},
	{
	"epoch": 0.3527580071174377,
	"grad_norm": 0.31982484459877014,
	"learning_rate": 4.1608040201005026e-05,
	"loss": 0.9372,
	"step": 793
	},
	{
	"epoch": 0.35320284697508897,
	"grad_norm": 0.323798805475235,
	"learning_rate": 4.14070351758794e-05,
	"loss": 0.9342,
	"step": 794
	},
	{
	"epoch": 0.3536476868327402,
	"grad_norm": 0.3632016181945801,
	"learning_rate": 4.120603015075377e-05,
	"loss": 0.9682,
	"step": 795
	},
	{
	"epoch": 0.3540925266903915,
	"grad_norm": 0.3542345464229584,
	"learning_rate": 4.1005025125628145e-05,
	"loss": 0.9129,
	"step": 796
	},
	{
	"epoch": 0.3545373665480427,
	"grad_norm": 0.48540955781936646,
	"learning_rate": 4.080402010050252e-05,
	"loss": 1.0319,
	"step": 797
	},
	{
	"epoch": 0.35498220640569395,
	"grad_norm": 0.37020549178123474,
	"learning_rate": 4.060301507537689e-05,
	"loss": 0.8719,
	"step": 798
	},
	{
	"epoch": 0.3554270462633452,
	"grad_norm": 0.3740653097629547,
	"learning_rate": 4.040201005025126e-05,
	"loss": 0.9464,
	"step": 799
	},
	{
	"epoch": 0.35587188612099646,
	"grad_norm": 0.3594771921634674,
	"learning_rate": 4.020100502512563e-05,
	"loss": 0.9592,
	"step": 800
	},
	{
	"epoch": 0.35631672597864766,
	"grad_norm": 0.4490291476249695,
	"learning_rate": 4e-05,
	"loss": 0.9624,
	"step": 801
	},
	{
	"epoch": 0.3567615658362989,
	"grad_norm": 0.3087805509567261,
	"learning_rate": 3.9798994974874376e-05,
	"loss": 0.9905,
	"step": 802
	},
	{
	"epoch": 0.3572064056939502,
	"grad_norm": 0.39388784766197205,
	"learning_rate": 3.959798994974875e-05,
	"loss": 0.9644,
	"step": 803
	},
	{
	"epoch": 0.35765124555160144,
	"grad_norm": 0.411502867937088,
	"learning_rate": 3.9396984924623115e-05,
	"loss": 0.9547,
	"step": 804
	},
	{
	"epoch": 0.3580960854092527,
	"grad_norm": 0.3525221049785614,
	"learning_rate": 3.919597989949749e-05,
	"loss": 0.8875,
	"step": 805
	},
	{
	"epoch": 0.3585409252669039,
	"grad_norm": 0.3365210294723511,
	"learning_rate": 3.899497487437186e-05,
	"loss": 0.913,
	"step": 806
	},
	{
	"epoch": 0.35898576512455516,
	"grad_norm": 0.3405802249908447,
	"learning_rate": 3.8793969849246234e-05,
	"loss": 0.8679,
	"step": 807
	},
	{
	"epoch": 0.3594306049822064,
	"grad_norm": 0.33175504207611084,
	"learning_rate": 3.85929648241206e-05,
	"loss": 0.929,
	"step": 808
	},
	{
	"epoch": 0.35987544483985767,
	"grad_norm": 0.3878396451473236,
	"learning_rate": 3.8391959798994973e-05,
	"loss": 0.9765,
	"step": 809
	},
	{
	"epoch": 0.3603202846975089,
	"grad_norm": 0.3880787491798401,
	"learning_rate": 3.8190954773869346e-05,
	"loss": 0.9282,
	"step": 810
	},
	{
	"epoch": 0.36076512455516013,
	"grad_norm": 0.3783409297466278,
	"learning_rate": 3.798994974874372e-05,
	"loss": 0.9704,
	"step": 811
	},
	{
	"epoch": 0.3612099644128114,
	"grad_norm": 0.37962606549263,
	"learning_rate": 3.778894472361809e-05,
	"loss": 1.0464,
	"step": 812
	},
	{
	"epoch": 0.36165480427046265,
	"grad_norm": 0.4055241644382477,
	"learning_rate": 3.7587939698492465e-05,
	"loss": 0.9668,
	"step": 813
	},
	{
	"epoch": 0.3620996441281139,
	"grad_norm": 0.3240121304988861,
	"learning_rate": 3.738693467336684e-05,
	"loss": 0.9415,
	"step": 814
	},
	{
	"epoch": 0.3625444839857651,
	"grad_norm": 0.29463499784469604,
	"learning_rate": 3.7185929648241204e-05,
	"loss": 0.9489,
	"step": 815
	},
	{
	"epoch": 0.36298932384341637,
	"grad_norm": 0.4620193839073181,
	"learning_rate": 3.698492462311558e-05,
	"loss": 0.8513,
	"step": 816
	},
	{
	"epoch": 0.3634341637010676,
	"grad_norm": 0.3686508536338806,
	"learning_rate": 3.678391959798995e-05,
	"loss": 1.016,
	"step": 817
	},
	{
	"epoch": 0.3638790035587189,
	"grad_norm": 0.366397500038147,
	"learning_rate": 3.658291457286432e-05,
	"loss": 0.8772,
	"step": 818
	},
	{
	"epoch": 0.3643238434163701,
	"grad_norm": 0.3382261097431183,
	"learning_rate": 3.6381909547738696e-05,
	"loss": 0.9158,
	"step": 819
	},
	{
	"epoch": 0.36476868327402134,
	"grad_norm": 0.3870478570461273,
	"learning_rate": 3.618090452261307e-05,
	"loss": 0.9776,
	"step": 820
	},
	{
	"epoch": 0.3652135231316726,
	"grad_norm": 0.40006503462791443,
	"learning_rate": 3.597989949748744e-05,
	"loss": 0.9127,
	"step": 821
	},
	{
	"epoch": 0.36565836298932386,
	"grad_norm": 0.41942328214645386,
	"learning_rate": 3.5778894472361815e-05,
	"loss": 0.8786,
	"step": 822
	},
	{
	"epoch": 0.36610320284697506,
	"grad_norm": 0.34577417373657227,
	"learning_rate": 3.557788944723618e-05,
	"loss": 0.9029,
	"step": 823
	},
	{
	"epoch": 0.3665480427046263,
	"grad_norm": 0.3666742146015167,
	"learning_rate": 3.5376884422110554e-05,
	"loss": 0.9616,
	"step": 824
	},
	{
	"epoch": 0.3669928825622776,
	"grad_norm": 0.41306397318840027,
	"learning_rate": 3.517587939698493e-05,
	"loss": 0.8994,
	"step": 825
	},
	{
	"epoch": 0.36743772241992884,
	"grad_norm": 0.354464054107666,
	"learning_rate": 3.49748743718593e-05,
	"loss": 0.8398,
	"step": 826
	},
	{
	"epoch": 0.3678825622775801,
	"grad_norm": 0.40960076451301575,
	"learning_rate": 3.4773869346733667e-05,
	"loss": 1.0062,
	"step": 827
	},
	{
	"epoch": 0.3683274021352313,
	"grad_norm": 0.35255295038223267,
	"learning_rate": 3.457286432160804e-05,
	"loss": 1.013,
	"step": 828
	},
	{
	"epoch": 0.36877224199288255,
	"grad_norm": 0.320544570684433,
	"learning_rate": 3.437185929648241e-05,
	"loss": 0.8456,
	"step": 829
	},
	{
	"epoch": 0.3692170818505338,
	"grad_norm": 0.3942032754421234,
	"learning_rate": 3.4170854271356785e-05,
	"loss": 0.9627,
	"step": 830
	},
	{
	"epoch": 0.36966192170818507,
	"grad_norm": 0.34646615386009216,
	"learning_rate": 3.396984924623116e-05,
	"loss": 0.9397,
	"step": 831
	},
	{
	"epoch": 0.3701067615658363,
	"grad_norm": 0.3462013006210327,
	"learning_rate": 3.3768844221105525e-05,
	"loss": 0.9722,
	"step": 832
	},
	{
	"epoch": 0.37055160142348753,
	"grad_norm": 0.44638511538505554,
	"learning_rate": 3.35678391959799e-05,
	"loss": 1.0433,
	"step": 833
	},
	{
	"epoch": 0.3709964412811388,
	"grad_norm": 0.3865186870098114,
	"learning_rate": 3.336683417085427e-05,
	"loss": 0.9243,
	"step": 834
	},
	{
	"epoch": 0.37144128113879005,
	"grad_norm": 0.3224859833717346,
	"learning_rate": 3.3165829145728643e-05,
	"loss": 0.9197,
	"step": 835
	},
	{
	"epoch": 0.3718861209964413,
	"grad_norm": 0.3511156439781189,
	"learning_rate": 3.2964824120603016e-05,
	"loss": 0.951,
	"step": 836
	},
	{
	"epoch": 0.3723309608540925,
	"grad_norm": 0.3146522343158722,
	"learning_rate": 3.276381909547739e-05,
	"loss": 0.9514,
	"step": 837
	},
	{
	"epoch": 0.37277580071174377,
	"grad_norm": 0.3094431757926941,
	"learning_rate": 3.256281407035176e-05,
	"loss": 0.8729,
	"step": 838
	},
	{
	"epoch": 0.373220640569395,
	"grad_norm": 0.3724329471588135,
	"learning_rate": 3.2361809045226135e-05,
	"loss": 0.9897,
	"step": 839
	},
	{
	"epoch": 0.3736654804270463,
	"grad_norm": 0.3881984353065491,
	"learning_rate": 3.21608040201005e-05,
	"loss": 0.9755,
	"step": 840
	},
	{
	"epoch": 0.3741103202846975,
	"grad_norm": 0.30279484391212463,
	"learning_rate": 3.1959798994974875e-05,
	"loss": 0.8903,
	"step": 841
	},
	{
	"epoch": 0.37455516014234874,
	"grad_norm": 0.42050573229789734,
	"learning_rate": 3.175879396984925e-05,
	"loss": 0.9163,
	"step": 842
	},
	{
	"epoch": 0.375,
	"grad_norm": 0.3830379545688629,
	"learning_rate": 3.155778894472362e-05,
	"loss": 0.9519,
	"step": 843
	},
	{
	"epoch": 0.37544483985765126,
	"grad_norm": 0.32881635427474976,
	"learning_rate": 3.1356783919597993e-05,
	"loss": 0.9225,
	"step": 844
	},
	{
	"epoch": 0.3758896797153025,
	"grad_norm": 0.30189692974090576,
	"learning_rate": 3.1155778894472366e-05,
	"loss": 0.916,
	"step": 845
	},
	{
	"epoch": 0.3763345195729537,
	"grad_norm": 0.37163209915161133,
	"learning_rate": 3.095477386934674e-05,
	"loss": 0.9399,
	"step": 846
	},
	{
	"epoch": 0.376779359430605,
	"grad_norm": 0.43418970704078674,
	"learning_rate": 3.075376884422111e-05,
	"loss": 1.0015,
	"step": 847
	},
	{
	"epoch": 0.37722419928825623,
	"grad_norm": 0.3178066313266754,
	"learning_rate": 3.055276381909548e-05,
	"loss": 0.9131,
	"step": 848
	},
	{
	"epoch": 0.3776690391459075,
	"grad_norm": 0.3209547996520996,
	"learning_rate": 3.0351758793969855e-05,
	"loss": 0.9498,
	"step": 849
	},
	{
	"epoch": 0.3781138790035587,
	"grad_norm": 0.4426248371601105,
	"learning_rate": 3.015075376884422e-05,
	"loss": 0.9335,
	"step": 850
	},
	{
	"epoch": 0.37855871886120995,
	"grad_norm": 0.41413775086402893,
	"learning_rate": 2.994974874371859e-05,
	"loss": 0.8906,
	"step": 851
	},
	{
	"epoch": 0.3790035587188612,
	"grad_norm": 0.3665112555027008,
	"learning_rate": 2.9748743718592964e-05,
	"loss": 0.8849,
	"step": 852
	},
	{
	"epoch": 0.37944839857651247,
	"grad_norm": 0.36841294169425964,
	"learning_rate": 2.9547738693467337e-05,
	"loss": 0.8004,
	"step": 853
	},
	{
	"epoch": 0.3798932384341637,
	"grad_norm": 0.4228864014148712,
	"learning_rate": 2.934673366834171e-05,
	"loss": 1.0018,
	"step": 854
	},
	{
	"epoch": 0.38033807829181493,
	"grad_norm": 0.34962666034698486,
	"learning_rate": 2.914572864321608e-05,
	"loss": 0.9105,
	"step": 855
	},
	{
	"epoch": 0.3807829181494662,
	"grad_norm": 0.4448448121547699,
	"learning_rate": 2.8944723618090452e-05,
	"loss": 0.9391,
	"step": 856
	},
	{
	"epoch": 0.38122775800711745,
	"grad_norm": 0.36843836307525635,
	"learning_rate": 2.8743718592964825e-05,
	"loss": 0.9911,
	"step": 857
	},
	{
	"epoch": 0.3816725978647687,
	"grad_norm": 0.35289087891578674,
	"learning_rate": 2.8542713567839198e-05,
	"loss": 0.9575,
	"step": 858
	},
	{
	"epoch": 0.3821174377224199,
	"grad_norm": 0.39545342326164246,
	"learning_rate": 2.8341708542713568e-05,
	"loss": 0.9074,
	"step": 859
	},
	{
	"epoch": 0.38256227758007116,
	"grad_norm": 0.3101719319820404,
	"learning_rate": 2.814070351758794e-05,
	"loss": 0.8479,
	"step": 860
	},
	{
	"epoch": 0.3830071174377224,
	"grad_norm": 0.38031789660453796,
	"learning_rate": 2.7939698492462314e-05,
	"loss": 1.0281,
	"step": 861
	},
	{
	"epoch": 0.3834519572953737,
	"grad_norm": 0.38144952058792114,
	"learning_rate": 2.7738693467336686e-05,
	"loss": 0.9668,
	"step": 862
	},
	{
	"epoch": 0.38389679715302494,
	"grad_norm": 0.41572901606559753,
	"learning_rate": 2.7537688442211056e-05,
	"loss": 0.9258,
	"step": 863
	},
	{
	"epoch": 0.38434163701067614,
	"grad_norm": 0.2982839345932007,
	"learning_rate": 2.733668341708543e-05,
	"loss": 0.8807,
	"step": 864
	},
	{
	"epoch": 0.3847864768683274,
	"grad_norm": 0.3221079111099243,
	"learning_rate": 2.7135678391959802e-05,
	"loss": 0.9178,
	"step": 865
	},
	{
	"epoch": 0.38523131672597866,
	"grad_norm": 0.38755419850349426,
	"learning_rate": 2.6934673366834175e-05,
	"loss": 0.9183,
	"step": 866
	},
	{
	"epoch": 0.3856761565836299,
	"grad_norm": 0.36158278584480286,
	"learning_rate": 2.6733668341708545e-05,
	"loss": 0.9274,
	"step": 867
	},
	{
	"epoch": 0.3861209964412811,
	"grad_norm": 0.308855801820755,
	"learning_rate": 2.6532663316582917e-05,
	"loss": 0.9158,
	"step": 868
	},
	{
	"epoch": 0.3865658362989324,
	"grad_norm": 0.42090025544166565,
	"learning_rate": 2.633165829145729e-05,
	"loss": 0.9392,
	"step": 869
	},
	{
	"epoch": 0.38701067615658363,
	"grad_norm": 0.3310891389846802,
	"learning_rate": 2.613065326633166e-05,
	"loss": 0.9232,
	"step": 870
	},
	{
	"epoch": 0.3874555160142349,
	"grad_norm": 0.3915763199329376,
	"learning_rate": 2.5929648241206033e-05,
	"loss": 0.8819,
	"step": 871
	},
	{
	"epoch": 0.3879003558718861,
	"grad_norm": 0.3351011872291565,
	"learning_rate": 2.5728643216080406e-05,
	"loss": 0.9757,
	"step": 872
	},
	{
	"epoch": 0.38834519572953735,
	"grad_norm": 0.32709652185440063,
	"learning_rate": 2.5527638190954772e-05,
	"loss": 0.8823,
	"step": 873
	},
	{
	"epoch": 0.3887900355871886,
	"grad_norm": 0.34310227632522583,
	"learning_rate": 2.5326633165829145e-05,
	"loss": 0.8623,
	"step": 874
	},
	{
	"epoch": 0.38923487544483987,
	"grad_norm": 0.3940928876399994,
	"learning_rate": 2.5125628140703518e-05,
	"loss": 0.9409,
	"step": 875
	},
	{
	"epoch": 0.3896797153024911,
	"grad_norm": 0.34565237164497375,
	"learning_rate": 2.492462311557789e-05,
	"loss": 0.9369,
	"step": 876
	},
	{
	"epoch": 0.39012455516014233,
	"grad_norm": 0.37526604533195496,
	"learning_rate": 2.4723618090452264e-05,
	"loss": 0.8391,
	"step": 877
	},
	{
	"epoch": 0.3905693950177936,
	"grad_norm": 0.3760671019554138,
	"learning_rate": 2.4522613065326637e-05,
	"loss": 0.9549,
	"step": 878
	},
	{
	"epoch": 0.39101423487544484,
	"grad_norm": 0.35706883668899536,
	"learning_rate": 2.4321608040201007e-05,
	"loss": 0.7821,
	"step": 879
	},
	{
	"epoch": 0.3914590747330961,
	"grad_norm": 0.37825968861579895,
	"learning_rate": 2.4120603015075376e-05,
	"loss": 1.0344,
	"step": 880
	},
	{
	"epoch": 0.3919039145907473,
	"grad_norm": 0.3388988673686981,
	"learning_rate": 2.391959798994975e-05,
	"loss": 0.9347,
	"step": 881
	},
	{
	"epoch": 0.39234875444839856,
	"grad_norm": 0.35239583253860474,
	"learning_rate": 2.3718592964824122e-05,
	"loss": 0.8896,
	"step": 882
	},
	{
	"epoch": 0.3927935943060498,
	"grad_norm": 0.34962180256843567,
	"learning_rate": 2.351758793969849e-05,
	"loss": 0.9186,
	"step": 883
	},
	{
	"epoch": 0.3932384341637011,
	"grad_norm": 0.3848302960395813,
	"learning_rate": 2.3316582914572865e-05,
	"loss": 1.0796,
	"step": 884
	},
	{
	"epoch": 0.39368327402135234,
	"grad_norm": 0.4643072783946991,
	"learning_rate": 2.3115577889447238e-05,
	"loss": 0.9133,
	"step": 885
	},
	{
	"epoch": 0.39412811387900354,
	"grad_norm": 0.3217661678791046,
	"learning_rate": 2.291457286432161e-05,
	"loss": 0.9077,
	"step": 886
	},
	{
	"epoch": 0.3945729537366548,
	"grad_norm": 0.307915061712265,
	"learning_rate": 2.271356783919598e-05,
	"loss": 0.9465,
	"step": 887
	},
	{
	"epoch": 0.39501779359430605,
	"grad_norm": 0.41546520590782166,
	"learning_rate": 2.2512562814070353e-05,
	"loss": 0.9453,
	"step": 888
	},
	{
	"epoch": 0.3954626334519573,
	"grad_norm": 0.44572389125823975,
	"learning_rate": 2.2311557788944726e-05,
	"loss": 1.1163,
	"step": 889
	},
	{
	"epoch": 0.3959074733096085,
	"grad_norm": 0.3532145619392395,
	"learning_rate": 2.21105527638191e-05,
	"loss": 0.9142,
	"step": 890
	},
	{
	"epoch": 0.3963523131672598,
	"grad_norm": 0.3812106251716614,
	"learning_rate": 2.190954773869347e-05,
	"loss": 0.8557,
	"step": 891
	},
	{
	"epoch": 0.39679715302491103,
	"grad_norm": 0.40992099046707153,
	"learning_rate": 2.1708542713567838e-05,
	"loss": 0.9685,
	"step": 892
	},
	{
	"epoch": 0.3972419928825623,
	"grad_norm": 0.35445836186408997,
	"learning_rate": 2.150753768844221e-05,
	"loss": 0.9416,
	"step": 893
	},
	{
	"epoch": 0.39768683274021355,
	"grad_norm": 0.34018823504447937,
	"learning_rate": 2.1306532663316584e-05,
	"loss": 0.9203,
	"step": 894
	},
	{
	"epoch": 0.39813167259786475,
	"grad_norm": 0.37122175097465515,
	"learning_rate": 2.1105527638190957e-05,
	"loss": 1.1101,
	"step": 895
	},
	{
	"epoch": 0.398576512455516,
	"grad_norm": 0.3946356177330017,
	"learning_rate": 2.0904522613065327e-05,
	"loss": 0.932,
	"step": 896
	},
	{
	"epoch": 0.39902135231316727,
	"grad_norm": 0.3920304775238037,
	"learning_rate": 2.07035175879397e-05,
	"loss": 0.9307,
	"step": 897
	},
	{
	"epoch": 0.3994661921708185,
	"grad_norm": 0.35796669125556946,
	"learning_rate": 2.0502512562814073e-05,
	"loss": 0.9004,
	"step": 898
	},
	{
	"epoch": 0.3999110320284697,
	"grad_norm": 0.37431228160858154,
	"learning_rate": 2.0301507537688446e-05,
	"loss": 0.9462,
	"step": 899
	},
	{
	"epoch": 0.400355871886121,
	"grad_norm": 0.43452969193458557,
	"learning_rate": 2.0100502512562815e-05,
	"loss": 0.9755,
	"step": 900
	},
	{
	"epoch": 0.40080071174377224,
	"grad_norm": 0.308403342962265,
	"learning_rate": 1.9899497487437188e-05,
	"loss": 0.8736,
	"step": 901
	},
	{
	"epoch": 0.4012455516014235,
	"grad_norm": 0.425761342048645,
	"learning_rate": 1.9698492462311558e-05,
	"loss": 1.0335,
	"step": 902
	},
	{
	"epoch": 0.40169039145907476,
	"grad_norm": 0.4178042709827423,
	"learning_rate": 1.949748743718593e-05,
	"loss": 0.8853,
	"step": 903
	},
	{
	"epoch": 0.40213523131672596,
	"grad_norm": 0.31192678213119507,
	"learning_rate": 1.92964824120603e-05,
	"loss": 0.8916,
	"step": 904
	},
	{
	"epoch": 0.4025800711743772,
	"grad_norm": 0.4430267810821533,
	"learning_rate": 1.9095477386934673e-05,
	"loss": 0.962,
	"step": 905
	},
	{
	"epoch": 0.4030249110320285,
	"grad_norm": 0.34475943446159363,
	"learning_rate": 1.8894472361809046e-05,
	"loss": 0.8855,
	"step": 906
	},
	{
	"epoch": 0.40346975088967973,
	"grad_norm": 0.36281871795654297,
	"learning_rate": 1.869346733668342e-05,
	"loss": 0.8925,
	"step": 907
	},
	{
	"epoch": 0.40391459074733094,
	"grad_norm": 0.39103028178215027,
	"learning_rate": 1.849246231155779e-05,
	"loss": 0.86,
	"step": 908
	},
	{
	"epoch": 0.4043594306049822,
	"grad_norm": 0.3842105567455292,
	"learning_rate": 1.829145728643216e-05,
	"loss": 1.0245,
	"step": 909
	},
	{
	"epoch": 0.40480427046263345,
	"grad_norm": 0.4096086025238037,
	"learning_rate": 1.8090452261306535e-05,
	"loss": 0.8985,
	"step": 910
	},
	{
	"epoch": 0.4052491103202847,
	"grad_norm": 0.3068806827068329,
	"learning_rate": 1.7889447236180908e-05,
	"loss": 0.804,
	"step": 911
	},
	{
	"epoch": 0.40569395017793597,
	"grad_norm": 0.28686726093292236,
	"learning_rate": 1.7688442211055277e-05,
	"loss": 1.0117,
	"step": 912
	},
	{
	"epoch": 0.40613879003558717,
	"grad_norm": 0.2880958616733551,
	"learning_rate": 1.748743718592965e-05,
	"loss": 0.8703,
	"step": 913
	},
	{
	"epoch": 0.40658362989323843,
	"grad_norm": 0.3917515277862549,
	"learning_rate": 1.728643216080402e-05,
	"loss": 0.968,
	"step": 914
	},
	{
	"epoch": 0.4070284697508897,
	"grad_norm": 0.3814576268196106,
	"learning_rate": 1.7085427135678393e-05,
	"loss": 0.9305,
	"step": 915
	},
	{
	"epoch": 0.40747330960854095,
	"grad_norm": 0.3277512192726135,
	"learning_rate": 1.6884422110552762e-05,
	"loss": 0.8769,
	"step": 916
	},
	{
	"epoch": 0.40791814946619215,
	"grad_norm": 0.3101690709590912,
	"learning_rate": 1.6683417085427135e-05,
	"loss": 0.8974,
	"step": 917
	},
	{
	"epoch": 0.4083629893238434,
	"grad_norm": 0.34977224469184875,
	"learning_rate": 1.6482412060301508e-05,
	"loss": 0.9463,
	"step": 918
	},
	{
	"epoch": 0.40880782918149466,
	"grad_norm": 0.35917431116104126,
	"learning_rate": 1.628140703517588e-05,
	"loss": 0.9637,
	"step": 919
	},
	{
	"epoch": 0.4092526690391459,
	"grad_norm": 0.36873766779899597,
	"learning_rate": 1.608040201005025e-05,
	"loss": 0.7935,
	"step": 920
	},
	{
	"epoch": 0.4096975088967972,
	"grad_norm": 0.348622590303421,
	"learning_rate": 1.5879396984924624e-05,
	"loss": 0.9165,
	"step": 921
	},
	{
	"epoch": 0.4101423487544484,
	"grad_norm": 0.3866550028324127,
	"learning_rate": 1.5678391959798997e-05,
	"loss": 0.9679,
	"step": 922
	},
	{
	"epoch": 0.41058718861209964,
	"grad_norm": 0.3473438620567322,
	"learning_rate": 1.547738693467337e-05,
	"loss": 0.9228,
	"step": 923
	},
	{
	"epoch": 0.4110320284697509,
	"grad_norm": 0.43389132618904114,
	"learning_rate": 1.527638190954774e-05,
	"loss": 0.9541,
	"step": 924
	},
	{
	"epoch": 0.41147686832740216,
	"grad_norm": 0.40540170669555664,
	"learning_rate": 1.507537688442211e-05,
	"loss": 0.9814,
	"step": 925
	},
	{
	"epoch": 0.41192170818505336,
	"grad_norm": 0.3193083107471466,
	"learning_rate": 1.4874371859296482e-05,
	"loss": 0.9914,
	"step": 926
	},
	{
	"epoch": 0.4123665480427046,
	"grad_norm": 0.3414134383201599,
	"learning_rate": 1.4673366834170855e-05,
	"loss": 1.0247,
	"step": 927
	},
	{
	"epoch": 0.4128113879003559,
	"grad_norm": 0.3683622181415558,
	"learning_rate": 1.4472361809045226e-05,
	"loss": 0.997,
	"step": 928
	},
	{
	"epoch": 0.41325622775800713,
	"grad_norm": 0.37435421347618103,
	"learning_rate": 1.4271356783919599e-05,
	"loss": 0.902,
	"step": 929
	},
	{
	"epoch": 0.41370106761565834,
	"grad_norm": 0.31382328271865845,
	"learning_rate": 1.407035175879397e-05,
	"loss": 0.9035,
	"step": 930
	},
	{
	"epoch": 0.4141459074733096,
	"grad_norm": 0.35533666610717773,
	"learning_rate": 1.3869346733668343e-05,
	"loss": 1.0198,
	"step": 931
	},
	{
	"epoch": 0.41459074733096085,
	"grad_norm": 0.3287508189678192,
	"learning_rate": 1.3668341708542715e-05,
	"loss": 0.8756,
	"step": 932
	},
	{
	"epoch": 0.4150355871886121,
	"grad_norm": 0.3902789056301117,
	"learning_rate": 1.3467336683417087e-05,
	"loss": 0.8743,
	"step": 933
	},
	{
	"epoch": 0.41548042704626337,
	"grad_norm": 0.3326834738254547,
	"learning_rate": 1.3266331658291459e-05,
	"loss": 0.8924,
	"step": 934
	},
	{
	"epoch": 0.41592526690391457,
	"grad_norm": 0.3254244923591614,
	"learning_rate": 1.306532663316583e-05,
	"loss": 0.9863,
	"step": 935
	},
	{
	"epoch": 0.41637010676156583,
	"grad_norm": 0.41529661417007446,
	"learning_rate": 1.2864321608040203e-05,
	"loss": 0.8844,
	"step": 936
	},
	{
	"epoch": 0.4168149466192171,
	"grad_norm": 0.31171804666519165,
	"learning_rate": 1.2663316582914573e-05,
	"loss": 0.9245,
	"step": 937
	},
	{
	"epoch": 0.41725978647686834,
	"grad_norm": 0.33847880363464355,
	"learning_rate": 1.2462311557788946e-05,
	"loss": 0.9414,
	"step": 938
	},
	{
	"epoch": 0.41770462633451955,
	"grad_norm": 0.35839855670928955,
	"learning_rate": 1.2261306532663318e-05,
	"loss": 0.924,
	"step": 939
	},
	{
	"epoch": 0.4181494661921708,
	"grad_norm": 0.3934418857097626,
	"learning_rate": 1.2060301507537688e-05,
	"loss": 0.9718,
	"step": 940
	},
	{
	"epoch": 0.41859430604982206,
	"grad_norm": 0.35172393918037415,
	"learning_rate": 1.1859296482412061e-05,
	"loss": 0.8878,
	"step": 941
	},
	{
	"epoch": 0.4190391459074733,
	"grad_norm": 0.4257389307022095,
	"learning_rate": 1.1658291457286432e-05,
	"loss": 0.9914,
	"step": 942
	},
	{
	"epoch": 0.4194839857651246,
	"grad_norm": 0.3889011740684509,
	"learning_rate": 1.1457286432160805e-05,
	"loss": 0.8339,
	"step": 943
	},
	{
	"epoch": 0.4199288256227758,
	"grad_norm": 0.35240477323532104,
	"learning_rate": 1.1256281407035177e-05,
	"loss": 0.9792,
	"step": 944
	},
	{
	"epoch": 0.42037366548042704,
	"grad_norm": 0.3817644715309143,
	"learning_rate": 1.105527638190955e-05,
	"loss": 0.992,
	"step": 945
	},
	{
	"epoch": 0.4208185053380783,
	"grad_norm": 0.36161094903945923,
	"learning_rate": 1.0854271356783919e-05,
	"loss": 0.8468,
	"step": 946
	},
	{
	"epoch": 0.42126334519572955,
	"grad_norm": 0.3254898488521576,
	"learning_rate": 1.0653266331658292e-05,
	"loss": 0.9998,
	"step": 947
	},
	{
	"epoch": 0.42170818505338076,
	"grad_norm": 0.3513511121273041,
	"learning_rate": 1.0452261306532663e-05,
	"loss": 0.8928,
	"step": 948
	},
	{
	"epoch": 0.422153024911032,
	"grad_norm": 0.3499341309070587,
	"learning_rate": 1.0251256281407036e-05,
	"loss": 0.9137,
	"step": 949
	},
	{
	"epoch": 0.4225978647686833,
	"grad_norm": 0.3495025932788849,
	"learning_rate": 1.0050251256281408e-05,
	"loss": 0.8818,
	"step": 950
	},
	{
	"epoch": 0.42304270462633453,
	"grad_norm": 0.3569670021533966,
	"learning_rate": 9.849246231155779e-06,
	"loss": 0.9912,
	"step": 951
	},
	{
	"epoch": 0.4234875444839858,
	"grad_norm": 0.394522100687027,
	"learning_rate": 9.64824120603015e-06,
	"loss": 0.863,
	"step": 952
	},
	{
	"epoch": 0.423932384341637,
	"grad_norm": 0.35868117213249207,
	"learning_rate": 9.447236180904523e-06,
	"loss": 0.9022,
	"step": 953
	},
	{
	"epoch": 0.42437722419928825,
	"grad_norm": 0.33881279826164246,
	"learning_rate": 9.246231155778894e-06,
	"loss": 0.9634,
	"step": 954
	},
	{
	"epoch": 0.4248220640569395,
	"grad_norm": 0.36202844977378845,
	"learning_rate": 9.045226130653267e-06,
	"loss": 0.9441,
	"step": 955
	},
	{
	"epoch": 0.42526690391459077,
	"grad_norm": 0.39875316619873047,
	"learning_rate": 8.844221105527639e-06,
	"loss": 0.9702,
	"step": 956
	},
	{
	"epoch": 0.42571174377224197,
	"grad_norm": 0.3653241693973541,
	"learning_rate": 8.64321608040201e-06,
	"loss": 0.9436,
	"step": 957
	},
	{
	"epoch": 0.4261565836298932,
	"grad_norm": 0.31130754947662354,
	"learning_rate": 8.442211055276381e-06,
	"loss": 0.8579,
	"step": 958
	},
	{
	"epoch": 0.4266014234875445,
	"grad_norm": 0.3955715000629425,
	"learning_rate": 8.241206030150754e-06,
	"loss": 0.9889,
	"step": 959
	},
	{
	"epoch": 0.42704626334519574,
	"grad_norm": 0.35064584016799927,
	"learning_rate": 8.040201005025125e-06,
	"loss": 0.8877,
	"step": 960
	},
	{
	"epoch": 0.427491103202847,
	"grad_norm": 0.38837116956710815,
	"learning_rate": 7.839195979899498e-06,
	"loss": 0.9255,
	"step": 961
	},
	{
	"epoch": 0.4279359430604982,
	"grad_norm": 0.3994256556034088,
	"learning_rate": 7.63819095477387e-06,
	"loss": 0.9613,
	"step": 962
	},
	{
	"epoch": 0.42838078291814946,
	"grad_norm": 0.5106455087661743,
	"learning_rate": 7.437185929648241e-06,
	"loss": 0.9656,
	"step": 963
	},
	{
	"epoch": 0.4288256227758007,
	"grad_norm": 0.46163278818130493,
	"learning_rate": 7.236180904522613e-06,
	"loss": 1.0239,
	"step": 964
	},
	{
	"epoch": 0.429270462633452,
	"grad_norm": 0.37351593375205994,
	"learning_rate": 7.035175879396985e-06,
	"loss": 0.9637,
	"step": 965
	},
	{
	"epoch": 0.4297153024911032,
	"grad_norm": 0.34434255957603455,
	"learning_rate": 6.834170854271357e-06,
	"loss": 0.9929,
	"step": 966
	},
	{
	"epoch": 0.43016014234875444,
	"grad_norm": 0.3309576213359833,
	"learning_rate": 6.633165829145729e-06,
	"loss": 0.9139,
	"step": 967
	},
	{
	"epoch": 0.4306049822064057,
	"grad_norm": 0.5205715298652649,
	"learning_rate": 6.4321608040201015e-06,
	"loss": 1.0866,
	"step": 968
	},
	{
	"epoch": 0.43104982206405695,
	"grad_norm": 0.417214035987854,
	"learning_rate": 6.231155778894473e-06,
	"loss": 0.9765,
	"step": 969
	},
	{
	"epoch": 0.4314946619217082,
	"grad_norm": 0.3578605651855469,
	"learning_rate": 6.030150753768844e-06,
	"loss": 0.9261,
	"step": 970
	},
	{
	"epoch": 0.4319395017793594,
	"grad_norm": 0.35047677159309387,
	"learning_rate": 5.829145728643216e-06,
	"loss": 0.8318,
	"step": 971
	},
	{
	"epoch": 0.43238434163701067,
	"grad_norm": 0.30359870195388794,
	"learning_rate": 5.628140703517588e-06,
	"loss": 0.9497,
	"step": 972
	},
	{
	"epoch": 0.43282918149466193,
	"grad_norm": 0.32898378372192383,
	"learning_rate": 5.4271356783919595e-06,
	"loss": 0.9873,
	"step": 973
	},
	{
	"epoch": 0.4332740213523132,
	"grad_norm": 0.33104610443115234,
	"learning_rate": 5.226130653266332e-06,
	"loss": 0.9082,
	"step": 974
	},
	{
	"epoch": 0.4337188612099644,
	"grad_norm": 0.3245652914047241,
	"learning_rate": 5.025125628140704e-06,
	"loss": 0.8933,
	"step": 975
	},
	{
	"epoch": 0.43416370106761565,
	"grad_norm": 0.3264075517654419,
	"learning_rate": 4.824120603015075e-06,
	"loss": 0.9778,
	"step": 976
	},
	{
	"epoch": 0.4346085409252669,
	"grad_norm": 0.44732666015625,
	"learning_rate": 4.623115577889447e-06,
	"loss": 0.9509,
	"step": 977
	},
	{
	"epoch": 0.43505338078291816,
	"grad_norm": 0.35666903853416443,
	"learning_rate": 4.422110552763819e-06,
	"loss": 0.9392,
	"step": 978
	},
	{
	"epoch": 0.4354982206405694,
	"grad_norm": 0.3573732078075409,
	"learning_rate": 4.2211055276381906e-06,
	"loss": 0.9323,
	"step": 979
	},
	{
	"epoch": 0.4359430604982206,
	"grad_norm": 0.3136303126811981,
	"learning_rate": 4.020100502512563e-06,
	"loss": 0.959,
	"step": 980
	},
	{
	"epoch": 0.4363879003558719,
	"grad_norm": 0.36791837215423584,
	"learning_rate": 3.819095477386935e-06,
	"loss": 0.9151,
	"step": 981
	},
	{
	"epoch": 0.43683274021352314,
	"grad_norm": 0.3033190965652466,
	"learning_rate": 3.6180904522613065e-06,
	"loss": 0.9223,
	"step": 982
	},
	{
	"epoch": 0.4372775800711744,
	"grad_norm": 0.35878488421440125,
	"learning_rate": 3.4170854271356786e-06,
	"loss": 0.9418,
	"step": 983
	},
	{
	"epoch": 0.4377224199288256,
	"grad_norm": 0.3010809123516083,
	"learning_rate": 3.2160804020100507e-06,
	"loss": 0.8418,
	"step": 984
	},
	{
	"epoch": 0.43816725978647686,
	"grad_norm": 0.4400476813316345,
	"learning_rate": 3.015075376884422e-06,
	"loss": 0.9641,
	"step": 985
	},
	{
	"epoch": 0.4386120996441281,
	"grad_norm": 0.34438541531562805,
	"learning_rate": 2.814070351758794e-06,
	"loss": 0.8904,
	"step": 986
	},
	{
	"epoch": 0.4390569395017794,
	"grad_norm": 0.39025887846946716,
	"learning_rate": 2.613065326633166e-06,
	"loss": 0.9406,
	"step": 987
	},
	{
	"epoch": 0.4395017793594306,
	"grad_norm": 0.29612797498703003,
	"learning_rate": 2.4120603015075375e-06,
	"loss": 0.9153,
	"step": 988
	},
	{
	"epoch": 0.43994661921708184,
	"grad_norm": 0.3811575174331665,
	"learning_rate": 2.2110552763819096e-06,
	"loss": 0.8314,
	"step": 989
	},
	{
	"epoch": 0.4403914590747331,
	"grad_norm": 0.3829675614833832,
	"learning_rate": 2.0100502512562813e-06,
	"loss": 0.8798,
	"step": 990
	},
	{
	"epoch": 0.44083629893238435,
	"grad_norm": 0.33976423740386963,
	"learning_rate": 1.8090452261306533e-06,
	"loss": 0.8793,
	"step": 991
	},
	{
	"epoch": 0.4412811387900356,
	"grad_norm": 0.3907322883605957,
	"learning_rate": 1.6080402010050254e-06,
	"loss": 0.9893,
	"step": 992
	},
	{
	"epoch": 0.4417259786476868,
	"grad_norm": 0.33631375432014465,
	"learning_rate": 1.407035175879397e-06,
	"loss": 0.8798,
	"step": 993
	},
	{
	"epoch": 0.44217081850533807,
	"grad_norm": 0.41036009788513184,
	"learning_rate": 1.2060301507537688e-06,
	"loss": 0.843,
	"step": 994
	},
	{
	"epoch": 0.44261565836298933,
	"grad_norm": 0.3805226683616638,
	"learning_rate": 1.0050251256281407e-06,
	"loss": 1.0452,
	"step": 995
	},
	{
	"epoch": 0.4430604982206406,
	"grad_norm": 0.40261736512184143,
	"learning_rate": 8.040201005025127e-07,
	"loss": 0.8972,
	"step": 996
	},
	{
	"epoch": 0.4435053380782918,
	"grad_norm": 0.3762563467025757,
	"learning_rate": 6.030150753768844e-07,
	"loss": 0.8555,
	"step": 997
	},
	{
	"epoch": 0.44395017793594305,
	"grad_norm": 0.3261839747428894,
	"learning_rate": 4.0201005025125634e-07,
	"loss": 0.8795,
	"step": 998
	},
	{
	"epoch": 0.4443950177935943,
	"grad_norm": 0.309962660074234,
	"learning_rate": 2.0100502512562817e-07,
	"loss": 0.9488,
	"step": 999
	},
	{
	"epoch": 0.44483985765124556,
	"grad_norm": 0.3620010018348694,
	"learning_rate": 0.0,
	"loss": 0.953,
	"step": 1000
	}
	],
	"logging_steps": 1,
	"max_steps": 1000,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.7939011197427712e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}