|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 12603, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00238038562247084, |
|
"grad_norm": 29.197416305541992, |
|
"learning_rate": 4.996032690629215e-05, |
|
"loss": 5.8295, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00476077124494168, |
|
"grad_norm": 2.8866491317749023, |
|
"learning_rate": 4.99206538125843e-05, |
|
"loss": 0.9476, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007141156867412521, |
|
"grad_norm": 2.2606563568115234, |
|
"learning_rate": 4.988098071887646e-05, |
|
"loss": 0.1466, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00952154248988336, |
|
"grad_norm": 2.5246834754943848, |
|
"learning_rate": 4.984130762516862e-05, |
|
"loss": 0.0596, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011901928112354201, |
|
"grad_norm": 1.10219144821167, |
|
"learning_rate": 4.980163453146077e-05, |
|
"loss": 0.0351, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014282313734825042, |
|
"grad_norm": 1.7988760471343994, |
|
"learning_rate": 4.976196143775292e-05, |
|
"loss": 0.0293, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016662699357295883, |
|
"grad_norm": 0.2419203370809555, |
|
"learning_rate": 4.972228834404507e-05, |
|
"loss": 0.024, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01904308497976672, |
|
"grad_norm": 0.992480993270874, |
|
"learning_rate": 4.9682615250337225e-05, |
|
"loss": 0.0191, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021423470602237562, |
|
"grad_norm": 1.2107903957366943, |
|
"learning_rate": 4.9642942156629376e-05, |
|
"loss": 0.0147, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.023803856224708403, |
|
"grad_norm": 1.5667377710342407, |
|
"learning_rate": 4.960326906292153e-05, |
|
"loss": 0.0144, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026184241847179244, |
|
"grad_norm": 1.7987982034683228, |
|
"learning_rate": 4.956359596921368e-05, |
|
"loss": 0.0121, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028564627469650085, |
|
"grad_norm": 0.7142848968505859, |
|
"learning_rate": 4.952392287550583e-05, |
|
"loss": 0.0109, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.030945013092120922, |
|
"grad_norm": 0.9309341311454773, |
|
"learning_rate": 4.9484249781797984e-05, |
|
"loss": 0.0087, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03332539871459177, |
|
"grad_norm": 0.2679256498813629, |
|
"learning_rate": 4.944457668809014e-05, |
|
"loss": 0.0065, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.035705784337062604, |
|
"grad_norm": 0.36588725447654724, |
|
"learning_rate": 4.940490359438229e-05, |
|
"loss": 0.0075, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03808616995953344, |
|
"grad_norm": 0.6737563610076904, |
|
"learning_rate": 4.936523050067445e-05, |
|
"loss": 0.0092, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040466555582004286, |
|
"grad_norm": 0.3371886610984802, |
|
"learning_rate": 4.93255574069666e-05, |
|
"loss": 0.0067, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.042846941204475124, |
|
"grad_norm": 1.0238951444625854, |
|
"learning_rate": 4.928588431325875e-05, |
|
"loss": 0.0084, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04522732682694597, |
|
"grad_norm": 1.0350103378295898, |
|
"learning_rate": 4.9246211219550906e-05, |
|
"loss": 0.0073, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.047607712449416806, |
|
"grad_norm": 0.33256474137306213, |
|
"learning_rate": 4.9206538125843056e-05, |
|
"loss": 0.0082, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04998809807188764, |
|
"grad_norm": 0.0693468451499939, |
|
"learning_rate": 4.9166865032135206e-05, |
|
"loss": 0.0044, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05236848369435849, |
|
"grad_norm": 0.8809625506401062, |
|
"learning_rate": 4.912719193842736e-05, |
|
"loss": 0.0064, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.054748869316829325, |
|
"grad_norm": 0.36927270889282227, |
|
"learning_rate": 4.9087518844719514e-05, |
|
"loss": 0.0066, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05712925493930017, |
|
"grad_norm": 0.8885632753372192, |
|
"learning_rate": 4.9047845751011664e-05, |
|
"loss": 0.0063, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05950964056177101, |
|
"grad_norm": 0.5330325365066528, |
|
"learning_rate": 4.900817265730382e-05, |
|
"loss": 0.0059, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.061890026184241845, |
|
"grad_norm": 0.5747584700584412, |
|
"learning_rate": 4.896849956359597e-05, |
|
"loss": 0.0056, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06427041180671268, |
|
"grad_norm": 0.10936570912599564, |
|
"learning_rate": 4.892882646988812e-05, |
|
"loss": 0.0038, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06665079742918353, |
|
"grad_norm": 0.136638343334198, |
|
"learning_rate": 4.888915337618027e-05, |
|
"loss": 0.006, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06903118305165437, |
|
"grad_norm": 0.25448599457740784, |
|
"learning_rate": 4.884948028247243e-05, |
|
"loss": 0.0052, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07141156867412521, |
|
"grad_norm": 0.19224955141544342, |
|
"learning_rate": 4.8809807188764586e-05, |
|
"loss": 0.0041, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07379195429659605, |
|
"grad_norm": 0.9061737060546875, |
|
"learning_rate": 4.8770134095056736e-05, |
|
"loss": 0.0051, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07617233991906688, |
|
"grad_norm": 0.31071603298187256, |
|
"learning_rate": 4.873046100134889e-05, |
|
"loss": 0.0043, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07855272554153774, |
|
"grad_norm": 0.054100409150123596, |
|
"learning_rate": 4.869078790764104e-05, |
|
"loss": 0.004, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08093311116400857, |
|
"grad_norm": 0.11965326964855194, |
|
"learning_rate": 4.865111481393319e-05, |
|
"loss": 0.0039, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08331349678647941, |
|
"grad_norm": 0.16056092083454132, |
|
"learning_rate": 4.861144172022535e-05, |
|
"loss": 0.0036, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08569388240895025, |
|
"grad_norm": 0.08699148148298264, |
|
"learning_rate": 4.85717686265175e-05, |
|
"loss": 0.0032, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08807426803142109, |
|
"grad_norm": 0.16824030876159668, |
|
"learning_rate": 4.853209553280965e-05, |
|
"loss": 0.0033, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09045465365389194, |
|
"grad_norm": 0.07728957384824753, |
|
"learning_rate": 4.84924224391018e-05, |
|
"loss": 0.0023, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09283503927636277, |
|
"grad_norm": 0.2950897514820099, |
|
"learning_rate": 4.845274934539395e-05, |
|
"loss": 0.0039, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09521542489883361, |
|
"grad_norm": 0.6249143481254578, |
|
"learning_rate": 4.841307625168611e-05, |
|
"loss": 0.012, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09759581052130445, |
|
"grad_norm": 0.06545058637857437, |
|
"learning_rate": 4.837340315797826e-05, |
|
"loss": 0.0022, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09997619614377529, |
|
"grad_norm": 0.40417027473449707, |
|
"learning_rate": 4.833373006427042e-05, |
|
"loss": 0.003, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10235658176624614, |
|
"grad_norm": 0.38520482182502747, |
|
"learning_rate": 4.829405697056257e-05, |
|
"loss": 0.0037, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10473696738871698, |
|
"grad_norm": 0.9367744326591492, |
|
"learning_rate": 4.825438387685472e-05, |
|
"loss": 0.0029, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10711735301118781, |
|
"grad_norm": 0.09369224309921265, |
|
"learning_rate": 4.8214710783146875e-05, |
|
"loss": 0.0021, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10949773863365865, |
|
"grad_norm": 1.1114966869354248, |
|
"learning_rate": 4.8175037689439025e-05, |
|
"loss": 0.0024, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11187812425612949, |
|
"grad_norm": 0.15539304912090302, |
|
"learning_rate": 4.8135364595731175e-05, |
|
"loss": 0.0026, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11425850987860034, |
|
"grad_norm": 0.05451425537467003, |
|
"learning_rate": 4.809569150202333e-05, |
|
"loss": 0.0024, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11663889550107118, |
|
"grad_norm": 0.08954957127571106, |
|
"learning_rate": 4.805601840831548e-05, |
|
"loss": 0.0032, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.11901928112354201, |
|
"grad_norm": 0.24188756942749023, |
|
"learning_rate": 4.801634531460763e-05, |
|
"loss": 0.0023, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12139966674601285, |
|
"grad_norm": 0.062233567237854004, |
|
"learning_rate": 4.797667222089979e-05, |
|
"loss": 0.002, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12378005236848369, |
|
"grad_norm": 0.605993926525116, |
|
"learning_rate": 4.793699912719194e-05, |
|
"loss": 0.0021, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12616043799095453, |
|
"grad_norm": 1.5091257095336914, |
|
"learning_rate": 4.789732603348409e-05, |
|
"loss": 0.0026, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.12854082361342536, |
|
"grad_norm": 0.07300706952810287, |
|
"learning_rate": 4.785765293977625e-05, |
|
"loss": 0.0018, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1309212092358962, |
|
"grad_norm": 0.07547351717948914, |
|
"learning_rate": 4.78179798460684e-05, |
|
"loss": 0.0022, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.13330159485836707, |
|
"grad_norm": 0.017345329746603966, |
|
"learning_rate": 4.7778306752360555e-05, |
|
"loss": 0.002, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1356819804808379, |
|
"grad_norm": 0.048248808830976486, |
|
"learning_rate": 4.7738633658652705e-05, |
|
"loss": 0.0018, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13806236610330874, |
|
"grad_norm": 0.04654766246676445, |
|
"learning_rate": 4.7698960564944856e-05, |
|
"loss": 0.0026, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14044275172577958, |
|
"grad_norm": 0.7228689193725586, |
|
"learning_rate": 4.7659287471237006e-05, |
|
"loss": 0.0033, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14282313734825042, |
|
"grad_norm": 0.01947982981801033, |
|
"learning_rate": 4.761961437752916e-05, |
|
"loss": 0.0024, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14520352297072125, |
|
"grad_norm": 0.03398985415697098, |
|
"learning_rate": 4.7579941283821314e-05, |
|
"loss": 0.0019, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1475839085931921, |
|
"grad_norm": 0.11993751674890518, |
|
"learning_rate": 4.754026819011347e-05, |
|
"loss": 0.0024, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.14996429421566293, |
|
"grad_norm": 0.02739240974187851, |
|
"learning_rate": 4.750059509640562e-05, |
|
"loss": 0.0019, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15234467983813377, |
|
"grad_norm": 0.08998490869998932, |
|
"learning_rate": 4.746092200269777e-05, |
|
"loss": 0.0026, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1547250654606046, |
|
"grad_norm": 0.06008267030119896, |
|
"learning_rate": 4.742124890898992e-05, |
|
"loss": 0.0019, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15710545108307547, |
|
"grad_norm": 0.2969667911529541, |
|
"learning_rate": 4.738157581528208e-05, |
|
"loss": 0.0016, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1594858367055463, |
|
"grad_norm": 0.056759823113679886, |
|
"learning_rate": 4.7341902721574236e-05, |
|
"loss": 0.0026, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16186622232801715, |
|
"grad_norm": 0.36679673194885254, |
|
"learning_rate": 4.7302229627866386e-05, |
|
"loss": 0.0023, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16424660795048798, |
|
"grad_norm": 0.29111284017562866, |
|
"learning_rate": 4.7262556534158536e-05, |
|
"loss": 0.0028, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16662699357295882, |
|
"grad_norm": 0.48570939898490906, |
|
"learning_rate": 4.722288344045069e-05, |
|
"loss": 0.0015, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16900737919542966, |
|
"grad_norm": 0.06863627582788467, |
|
"learning_rate": 4.718321034674284e-05, |
|
"loss": 0.0016, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1713877648179005, |
|
"grad_norm": 0.18400460481643677, |
|
"learning_rate": 4.7143537253034994e-05, |
|
"loss": 0.0025, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.17376815044037133, |
|
"grad_norm": 0.02043345756828785, |
|
"learning_rate": 4.710386415932715e-05, |
|
"loss": 0.0014, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17614853606284217, |
|
"grad_norm": 0.22026614844799042, |
|
"learning_rate": 4.70641910656193e-05, |
|
"loss": 0.0016, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.178528921685313, |
|
"grad_norm": 0.033756159245967865, |
|
"learning_rate": 4.702451797191145e-05, |
|
"loss": 0.0015, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18090930730778387, |
|
"grad_norm": 0.03022690862417221, |
|
"learning_rate": 4.69848448782036e-05, |
|
"loss": 0.0016, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1832896929302547, |
|
"grad_norm": 0.32997235655784607, |
|
"learning_rate": 4.694517178449576e-05, |
|
"loss": 0.0017, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.18567007855272555, |
|
"grad_norm": 0.6392120718955994, |
|
"learning_rate": 4.690549869078791e-05, |
|
"loss": 0.0015, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18805046417519639, |
|
"grad_norm": 0.12279071658849716, |
|
"learning_rate": 4.6865825597080066e-05, |
|
"loss": 0.0016, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19043084979766722, |
|
"grad_norm": 0.1228996068239212, |
|
"learning_rate": 4.682615250337222e-05, |
|
"loss": 0.0012, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19281123542013806, |
|
"grad_norm": 0.23846402764320374, |
|
"learning_rate": 4.678647940966437e-05, |
|
"loss": 0.0011, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1951916210426089, |
|
"grad_norm": 0.06786726415157318, |
|
"learning_rate": 4.674680631595652e-05, |
|
"loss": 0.0008, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.19757200666507974, |
|
"grad_norm": 0.062252361327409744, |
|
"learning_rate": 4.6707133222248675e-05, |
|
"loss": 0.0009, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.19995239228755057, |
|
"grad_norm": 0.10420612245798111, |
|
"learning_rate": 4.6667460128540825e-05, |
|
"loss": 0.0007, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2023327779100214, |
|
"grad_norm": 0.024685313925147057, |
|
"learning_rate": 4.6627787034832975e-05, |
|
"loss": 0.0012, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20471316353249228, |
|
"grad_norm": 0.07784374058246613, |
|
"learning_rate": 4.658811394112513e-05, |
|
"loss": 0.0011, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2070935491549631, |
|
"grad_norm": 0.1463196724653244, |
|
"learning_rate": 4.654844084741728e-05, |
|
"loss": 0.0017, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.20947393477743395, |
|
"grad_norm": 0.04599474370479584, |
|
"learning_rate": 4.650876775370944e-05, |
|
"loss": 0.0013, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2118543203999048, |
|
"grad_norm": 0.44877147674560547, |
|
"learning_rate": 4.646909466000159e-05, |
|
"loss": 0.0012, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21423470602237563, |
|
"grad_norm": 1.3056105375289917, |
|
"learning_rate": 4.642942156629374e-05, |
|
"loss": 0.0018, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21661509164484646, |
|
"grad_norm": 0.5220457911491394, |
|
"learning_rate": 4.638974847258589e-05, |
|
"loss": 0.0011, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2189954772673173, |
|
"grad_norm": 0.5913621783256531, |
|
"learning_rate": 4.635007537887805e-05, |
|
"loss": 0.0013, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22137586288978814, |
|
"grad_norm": 0.150216206908226, |
|
"learning_rate": 4.63104022851702e-05, |
|
"loss": 0.001, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.22375624851225898, |
|
"grad_norm": 0.022638270631432533, |
|
"learning_rate": 4.6270729191462355e-05, |
|
"loss": 0.0012, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2261366341347298, |
|
"grad_norm": 0.017948875203728676, |
|
"learning_rate": 4.6231056097754505e-05, |
|
"loss": 0.0008, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.22851701975720068, |
|
"grad_norm": 0.25053608417510986, |
|
"learning_rate": 4.6191383004046656e-05, |
|
"loss": 0.0019, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23089740537967152, |
|
"grad_norm": 0.12757046520709991, |
|
"learning_rate": 4.6151709910338806e-05, |
|
"loss": 0.0019, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23327779100214235, |
|
"grad_norm": 0.185049369931221, |
|
"learning_rate": 4.611203681663096e-05, |
|
"loss": 0.0014, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2356581766246132, |
|
"grad_norm": 0.37812331318855286, |
|
"learning_rate": 4.607236372292312e-05, |
|
"loss": 0.0014, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.23803856224708403, |
|
"grad_norm": 0.7450318336486816, |
|
"learning_rate": 4.603269062921527e-05, |
|
"loss": 0.0016, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24041894786955487, |
|
"grad_norm": 0.03629771247506142, |
|
"learning_rate": 4.599301753550742e-05, |
|
"loss": 0.0012, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2427993334920257, |
|
"grad_norm": 0.23223434388637543, |
|
"learning_rate": 4.595334444179957e-05, |
|
"loss": 0.0011, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24517971911449654, |
|
"grad_norm": 0.08511273562908173, |
|
"learning_rate": 4.591367134809172e-05, |
|
"loss": 0.0006, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.24756010473696738, |
|
"grad_norm": 0.17114369571208954, |
|
"learning_rate": 4.587399825438388e-05, |
|
"loss": 0.001, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.24994049035943822, |
|
"grad_norm": 0.04517650604248047, |
|
"learning_rate": 4.5834325160676035e-05, |
|
"loss": 0.0008, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.25232087598190905, |
|
"grad_norm": 0.20234528183937073, |
|
"learning_rate": 4.5794652066968186e-05, |
|
"loss": 0.0014, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2547012616043799, |
|
"grad_norm": 0.007534442003816366, |
|
"learning_rate": 4.5754978973260336e-05, |
|
"loss": 0.0008, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.25708164722685073, |
|
"grad_norm": 0.02520332857966423, |
|
"learning_rate": 4.5715305879552486e-05, |
|
"loss": 0.0008, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.25946203284932157, |
|
"grad_norm": 0.02674415148794651, |
|
"learning_rate": 4.5675632785844644e-05, |
|
"loss": 0.005, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.2618424184717924, |
|
"grad_norm": 0.0756726786494255, |
|
"learning_rate": 4.5635959692136794e-05, |
|
"loss": 0.0008, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2642228040942633, |
|
"grad_norm": 0.18692266941070557, |
|
"learning_rate": 4.559628659842895e-05, |
|
"loss": 0.0021, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.26660318971673413, |
|
"grad_norm": 0.021881476044654846, |
|
"learning_rate": 4.55566135047211e-05, |
|
"loss": 0.0016, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.26898357533920497, |
|
"grad_norm": 0.16764195263385773, |
|
"learning_rate": 4.551694041101325e-05, |
|
"loss": 0.001, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2713639609616758, |
|
"grad_norm": 0.6519142389297485, |
|
"learning_rate": 4.547726731730541e-05, |
|
"loss": 0.0015, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.27374434658414665, |
|
"grad_norm": 0.07793217897415161, |
|
"learning_rate": 4.543759422359756e-05, |
|
"loss": 0.0005, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2761247322066175, |
|
"grad_norm": 0.04451458901166916, |
|
"learning_rate": 4.539792112988971e-05, |
|
"loss": 0.0009, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2785051178290883, |
|
"grad_norm": 0.02606957219541073, |
|
"learning_rate": 4.5358248036181866e-05, |
|
"loss": 0.0013, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28088550345155916, |
|
"grad_norm": 0.03642681613564491, |
|
"learning_rate": 4.531857494247402e-05, |
|
"loss": 0.0007, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28326588907403, |
|
"grad_norm": 0.27240046858787537, |
|
"learning_rate": 4.527890184876617e-05, |
|
"loss": 0.0007, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28564627469650083, |
|
"grad_norm": 0.01732662320137024, |
|
"learning_rate": 4.5239228755058324e-05, |
|
"loss": 0.0011, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2880266603189717, |
|
"grad_norm": 0.10321195423603058, |
|
"learning_rate": 4.5199555661350474e-05, |
|
"loss": 0.0007, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2904070459414425, |
|
"grad_norm": 0.060121580958366394, |
|
"learning_rate": 4.5159882567642625e-05, |
|
"loss": 0.0014, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29278743156391335, |
|
"grad_norm": 0.028955884277820587, |
|
"learning_rate": 4.5120209473934775e-05, |
|
"loss": 0.0007, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.2951678171863842, |
|
"grad_norm": 0.0714436024427414, |
|
"learning_rate": 4.508053638022693e-05, |
|
"loss": 0.0007, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.297548202808855, |
|
"grad_norm": 0.052230022847652435, |
|
"learning_rate": 4.504086328651909e-05, |
|
"loss": 0.0008, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.29992858843132586, |
|
"grad_norm": 0.33476394414901733, |
|
"learning_rate": 4.500119019281124e-05, |
|
"loss": 0.0008, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3023089740537967, |
|
"grad_norm": 0.07732009142637253, |
|
"learning_rate": 4.496151709910339e-05, |
|
"loss": 0.0012, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30468935967626753, |
|
"grad_norm": 0.6843579411506653, |
|
"learning_rate": 4.492184400539554e-05, |
|
"loss": 0.0007, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3070697452987384, |
|
"grad_norm": 0.08292358368635178, |
|
"learning_rate": 4.488217091168769e-05, |
|
"loss": 0.0005, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3094501309212092, |
|
"grad_norm": 0.02598383277654648, |
|
"learning_rate": 4.484249781797985e-05, |
|
"loss": 0.001, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3118305165436801, |
|
"grad_norm": 0.7855332493782043, |
|
"learning_rate": 4.4802824724272005e-05, |
|
"loss": 0.0007, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31421090216615094, |
|
"grad_norm": 0.07066315412521362, |
|
"learning_rate": 4.4763151630564155e-05, |
|
"loss": 0.0005, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3165912877886218, |
|
"grad_norm": 0.012595695443451405, |
|
"learning_rate": 4.4723478536856305e-05, |
|
"loss": 0.0005, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3189716734110926, |
|
"grad_norm": 0.015364304184913635, |
|
"learning_rate": 4.4683805443148455e-05, |
|
"loss": 0.0005, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32135205903356345, |
|
"grad_norm": 0.0556706003844738, |
|
"learning_rate": 4.464413234944061e-05, |
|
"loss": 0.0011, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3237324446560343, |
|
"grad_norm": 0.22568030655384064, |
|
"learning_rate": 4.460445925573277e-05, |
|
"loss": 0.0023, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32611283027850513, |
|
"grad_norm": 0.048404548317193985, |
|
"learning_rate": 4.456478616202492e-05, |
|
"loss": 0.0016, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.32849321590097597, |
|
"grad_norm": 0.0693359524011612, |
|
"learning_rate": 4.452511306831707e-05, |
|
"loss": 0.0038, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3308736015234468, |
|
"grad_norm": 0.16493481397628784, |
|
"learning_rate": 4.448543997460922e-05, |
|
"loss": 0.0006, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33325398714591764, |
|
"grad_norm": 1.200024962425232, |
|
"learning_rate": 4.444576688090137e-05, |
|
"loss": 0.0011, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3356343727683885, |
|
"grad_norm": 0.23021258413791656, |
|
"learning_rate": 4.440609378719353e-05, |
|
"loss": 0.0009, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3380147583908593, |
|
"grad_norm": 0.0196574367582798, |
|
"learning_rate": 4.436642069348568e-05, |
|
"loss": 0.0006, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.34039514401333015, |
|
"grad_norm": 0.3254101574420929, |
|
"learning_rate": 4.4326747599777835e-05, |
|
"loss": 0.0015, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.342775529635801, |
|
"grad_norm": 0.026332201436161995, |
|
"learning_rate": 4.4287074506069986e-05, |
|
"loss": 0.0017, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.34515591525827183, |
|
"grad_norm": 0.2679558992385864, |
|
"learning_rate": 4.4247401412362136e-05, |
|
"loss": 0.0012, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.34753630088074267, |
|
"grad_norm": 0.06991584599018097, |
|
"learning_rate": 4.420772831865429e-05, |
|
"loss": 0.0007, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3499166865032135, |
|
"grad_norm": 0.036999981850385666, |
|
"learning_rate": 4.416805522494644e-05, |
|
"loss": 0.001, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.35229707212568434, |
|
"grad_norm": 0.042684607207775116, |
|
"learning_rate": 4.4128382131238594e-05, |
|
"loss": 0.0009, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3546774577481552, |
|
"grad_norm": 0.013829515315592289, |
|
"learning_rate": 4.408870903753075e-05, |
|
"loss": 0.0008, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.357057843370626, |
|
"grad_norm": 0.0129277054220438, |
|
"learning_rate": 4.40490359438229e-05, |
|
"loss": 0.0007, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3594382289930969, |
|
"grad_norm": 0.03553192317485809, |
|
"learning_rate": 4.400936285011505e-05, |
|
"loss": 0.0008, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.36181861461556775, |
|
"grad_norm": 0.01258548628538847, |
|
"learning_rate": 4.396968975640721e-05, |
|
"loss": 0.001, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3641990002380386, |
|
"grad_norm": 0.021352197974920273, |
|
"learning_rate": 4.393001666269936e-05, |
|
"loss": 0.001, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3665793858605094, |
|
"grad_norm": 0.035958483815193176, |
|
"learning_rate": 4.389034356899151e-05, |
|
"loss": 0.0007, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.36895977148298026, |
|
"grad_norm": 0.013187541626393795, |
|
"learning_rate": 4.3850670475283666e-05, |
|
"loss": 0.0009, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3713401571054511, |
|
"grad_norm": 0.02294233813881874, |
|
"learning_rate": 4.3810997381575816e-05, |
|
"loss": 0.0008, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.37372054272792193, |
|
"grad_norm": 0.14476238191127777, |
|
"learning_rate": 4.3771324287867974e-05, |
|
"loss": 0.0005, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.37610092835039277, |
|
"grad_norm": 0.2275228053331375, |
|
"learning_rate": 4.3731651194160124e-05, |
|
"loss": 0.0006, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3784813139728636, |
|
"grad_norm": 0.020434999838471413, |
|
"learning_rate": 4.3691978100452274e-05, |
|
"loss": 0.0004, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.38086169959533445, |
|
"grad_norm": 0.01040293462574482, |
|
"learning_rate": 4.3652305006744424e-05, |
|
"loss": 0.0003, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3832420852178053, |
|
"grad_norm": 0.0240499097853899, |
|
"learning_rate": 4.3612631913036575e-05, |
|
"loss": 0.0008, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3856224708402761, |
|
"grad_norm": 0.014826517552137375, |
|
"learning_rate": 4.357295881932874e-05, |
|
"loss": 0.0004, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.38800285646274696, |
|
"grad_norm": 0.011841246858239174, |
|
"learning_rate": 4.353328572562089e-05, |
|
"loss": 0.0007, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3903832420852178, |
|
"grad_norm": 0.0156678706407547, |
|
"learning_rate": 4.349361263191304e-05, |
|
"loss": 0.0006, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.39276362770768863, |
|
"grad_norm": 0.06124578043818474, |
|
"learning_rate": 4.345393953820519e-05, |
|
"loss": 0.0005, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.39514401333015947, |
|
"grad_norm": 0.06753918528556824, |
|
"learning_rate": 4.341426644449734e-05, |
|
"loss": 0.0006, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.3975243989526303, |
|
"grad_norm": 0.08766347169876099, |
|
"learning_rate": 4.33745933507895e-05, |
|
"loss": 0.0003, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.39990478457510115, |
|
"grad_norm": 0.021080242469906807, |
|
"learning_rate": 4.3334920257081654e-05, |
|
"loss": 0.0008, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.402285170197572, |
|
"grad_norm": 0.11970046162605286, |
|
"learning_rate": 4.3295247163373804e-05, |
|
"loss": 0.0005, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.4046655558200428, |
|
"grad_norm": 0.027210582047700882, |
|
"learning_rate": 4.3255574069665955e-05, |
|
"loss": 0.0003, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4070459414425137, |
|
"grad_norm": 0.021168386563658714, |
|
"learning_rate": 4.3215900975958105e-05, |
|
"loss": 0.0005, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.40942632706498455, |
|
"grad_norm": 0.012768070213496685, |
|
"learning_rate": 4.3176227882250255e-05, |
|
"loss": 0.0005, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.4118067126874554, |
|
"grad_norm": 0.1276211142539978, |
|
"learning_rate": 4.313655478854241e-05, |
|
"loss": 0.0005, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4141870983099262, |
|
"grad_norm": 0.08978109806776047, |
|
"learning_rate": 4.309688169483457e-05, |
|
"loss": 0.0009, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41656748393239706, |
|
"grad_norm": 0.3068161606788635, |
|
"learning_rate": 4.305720860112672e-05, |
|
"loss": 0.0007, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.4189478695548679, |
|
"grad_norm": 0.01211560145020485, |
|
"learning_rate": 4.301753550741887e-05, |
|
"loss": 0.0006, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.42132825517733874, |
|
"grad_norm": 0.02517927996814251, |
|
"learning_rate": 4.297786241371102e-05, |
|
"loss": 0.0006, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4237086407998096, |
|
"grad_norm": 0.017450081184506416, |
|
"learning_rate": 4.293818932000318e-05, |
|
"loss": 0.0003, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4260890264222804, |
|
"grad_norm": 0.014250938780605793, |
|
"learning_rate": 4.289851622629533e-05, |
|
"loss": 0.0005, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.42846941204475125, |
|
"grad_norm": 0.027526648715138435, |
|
"learning_rate": 4.2858843132587485e-05, |
|
"loss": 0.0005, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4308497976672221, |
|
"grad_norm": 0.0071271262131631374, |
|
"learning_rate": 4.2819170038879635e-05, |
|
"loss": 0.0007, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4332301832896929, |
|
"grad_norm": 0.11835234612226486, |
|
"learning_rate": 4.2779496945171785e-05, |
|
"loss": 0.0005, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43561056891216376, |
|
"grad_norm": 0.016718665137887, |
|
"learning_rate": 4.273982385146394e-05, |
|
"loss": 0.0007, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.4379909545346346, |
|
"grad_norm": 0.04138866439461708, |
|
"learning_rate": 4.270015075775609e-05, |
|
"loss": 0.0005, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44037134015710544, |
|
"grad_norm": 0.5920994281768799, |
|
"learning_rate": 4.266047766404824e-05, |
|
"loss": 0.0009, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4427517257795763, |
|
"grad_norm": 0.010394711047410965, |
|
"learning_rate": 4.2620804570340393e-05, |
|
"loss": 0.0004, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.4451321114020471, |
|
"grad_norm": 0.031543031334877014, |
|
"learning_rate": 4.258113147663255e-05, |
|
"loss": 0.0005, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.44751249702451795, |
|
"grad_norm": 0.016665128991007805, |
|
"learning_rate": 4.25414583829247e-05, |
|
"loss": 0.0083, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4498928826469888, |
|
"grad_norm": 0.03811788931488991, |
|
"learning_rate": 4.250178528921686e-05, |
|
"loss": 0.0014, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.4522732682694596, |
|
"grad_norm": 0.0656796246767044, |
|
"learning_rate": 4.246211219550901e-05, |
|
"loss": 0.0008, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4546536538919305, |
|
"grad_norm": 0.011904909275472164, |
|
"learning_rate": 4.242243910180116e-05, |
|
"loss": 0.0006, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.45703403951440136, |
|
"grad_norm": 0.01850457303225994, |
|
"learning_rate": 4.238276600809331e-05, |
|
"loss": 0.0004, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4594144251368722, |
|
"grad_norm": 0.10309766978025436, |
|
"learning_rate": 4.2343092914385466e-05, |
|
"loss": 0.0005, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.46179481075934303, |
|
"grad_norm": 0.13206863403320312, |
|
"learning_rate": 4.230341982067762e-05, |
|
"loss": 0.0004, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.46417519638181387, |
|
"grad_norm": 0.010924161411821842, |
|
"learning_rate": 4.226374672696977e-05, |
|
"loss": 0.0003, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4665555820042847, |
|
"grad_norm": 0.013266120105981827, |
|
"learning_rate": 4.2224073633261924e-05, |
|
"loss": 0.0004, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.46893596762675555, |
|
"grad_norm": 0.008552256040275097, |
|
"learning_rate": 4.2184400539554074e-05, |
|
"loss": 0.0003, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4713163532492264, |
|
"grad_norm": 0.0052538709715008736, |
|
"learning_rate": 4.2144727445846224e-05, |
|
"loss": 0.0005, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.4736967388716972, |
|
"grad_norm": 0.0074672214686870575, |
|
"learning_rate": 4.210505435213838e-05, |
|
"loss": 0.0003, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.47607712449416806, |
|
"grad_norm": 0.5743750929832458, |
|
"learning_rate": 4.206538125843054e-05, |
|
"loss": 0.0005, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4784575101166389, |
|
"grad_norm": 0.0076432847417891026, |
|
"learning_rate": 4.202570816472269e-05, |
|
"loss": 0.0005, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.48083789573910973, |
|
"grad_norm": 0.09265641123056412, |
|
"learning_rate": 4.198603507101484e-05, |
|
"loss": 0.0003, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.48321828136158057, |
|
"grad_norm": 0.01519245095551014, |
|
"learning_rate": 4.194636197730699e-05, |
|
"loss": 0.0002, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4855986669840514, |
|
"grad_norm": 0.04831220954656601, |
|
"learning_rate": 4.1906688883599146e-05, |
|
"loss": 0.0003, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.48797905260652225, |
|
"grad_norm": 0.024797851219773293, |
|
"learning_rate": 4.18670157898913e-05, |
|
"loss": 0.0004, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4903594382289931, |
|
"grad_norm": 0.008994129486382008, |
|
"learning_rate": 4.1827342696183454e-05, |
|
"loss": 0.0002, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4927398238514639, |
|
"grad_norm": 0.00806290004402399, |
|
"learning_rate": 4.1787669602475604e-05, |
|
"loss": 0.0004, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.49512020947393476, |
|
"grad_norm": 0.003900889540091157, |
|
"learning_rate": 4.1747996508767754e-05, |
|
"loss": 0.0002, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.4975005950964056, |
|
"grad_norm": 0.00262014614418149, |
|
"learning_rate": 4.1708323415059905e-05, |
|
"loss": 0.0002, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.49988098071887643, |
|
"grad_norm": 0.30837100744247437, |
|
"learning_rate": 4.166865032135206e-05, |
|
"loss": 0.0004, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5022613663413473, |
|
"grad_norm": 0.5304675102233887, |
|
"learning_rate": 4.162897722764421e-05, |
|
"loss": 0.0003, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5046417519638181, |
|
"grad_norm": 0.3627573847770691, |
|
"learning_rate": 4.158930413393637e-05, |
|
"loss": 0.0043, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.507022137586289, |
|
"grad_norm": 0.011327610351145267, |
|
"learning_rate": 4.154963104022852e-05, |
|
"loss": 0.0005, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5094025232087598, |
|
"grad_norm": 0.055182114243507385, |
|
"learning_rate": 4.150995794652067e-05, |
|
"loss": 0.0005, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5117829088312307, |
|
"grad_norm": 0.009911212138831615, |
|
"learning_rate": 4.147028485281283e-05, |
|
"loss": 0.0004, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5141632944537015, |
|
"grad_norm": 0.028569847345352173, |
|
"learning_rate": 4.143061175910498e-05, |
|
"loss": 0.0003, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5165436800761724, |
|
"grad_norm": 0.0070992144756019115, |
|
"learning_rate": 4.139093866539713e-05, |
|
"loss": 0.0006, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5189240656986431, |
|
"grad_norm": 0.008213848806917667, |
|
"learning_rate": 4.1351265571689285e-05, |
|
"loss": 0.0002, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.521304451321114, |
|
"grad_norm": 0.018964022397994995, |
|
"learning_rate": 4.1311592477981435e-05, |
|
"loss": 0.0003, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.5236848369435848, |
|
"grad_norm": 0.004533541388809681, |
|
"learning_rate": 4.1271919384273585e-05, |
|
"loss": 0.0003, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5260652225660557, |
|
"grad_norm": 0.12422726303339005, |
|
"learning_rate": 4.123224629056574e-05, |
|
"loss": 0.0003, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5284456081885266, |
|
"grad_norm": 0.019521724432706833, |
|
"learning_rate": 4.119257319685789e-05, |
|
"loss": 0.0003, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5308259938109974, |
|
"grad_norm": 0.03547817841172218, |
|
"learning_rate": 4.115290010315004e-05, |
|
"loss": 0.0004, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5332063794334683, |
|
"grad_norm": 0.9750944375991821, |
|
"learning_rate": 4.111322700944219e-05, |
|
"loss": 0.0005, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.535586765055939, |
|
"grad_norm": 0.09758254885673523, |
|
"learning_rate": 4.107355391573435e-05, |
|
"loss": 0.0004, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5379671506784099, |
|
"grad_norm": 0.20201332867145538, |
|
"learning_rate": 4.103388082202651e-05, |
|
"loss": 0.0008, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5403475363008807, |
|
"grad_norm": 0.2006085067987442, |
|
"learning_rate": 4.099420772831866e-05, |
|
"loss": 0.0008, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5427279219233516, |
|
"grad_norm": 0.0802696943283081, |
|
"learning_rate": 4.095453463461081e-05, |
|
"loss": 0.0007, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5451083075458224, |
|
"grad_norm": 0.4039531350135803, |
|
"learning_rate": 4.091486154090296e-05, |
|
"loss": 0.0024, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5474886931682933, |
|
"grad_norm": 0.006702470127493143, |
|
"learning_rate": 4.087518844719511e-05, |
|
"loss": 0.0007, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5498690787907641, |
|
"grad_norm": 0.1001976877450943, |
|
"learning_rate": 4.083551535348727e-05, |
|
"loss": 0.0003, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.552249464413235, |
|
"grad_norm": 0.005626179743558168, |
|
"learning_rate": 4.079584225977942e-05, |
|
"loss": 0.0009, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5546298500357058, |
|
"grad_norm": 0.009593102149665356, |
|
"learning_rate": 4.075616916607157e-05, |
|
"loss": 0.0003, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5570102356581766, |
|
"grad_norm": 0.014003382995724678, |
|
"learning_rate": 4.0716496072363723e-05, |
|
"loss": 0.0003, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5593906212806474, |
|
"grad_norm": 0.012953966856002808, |
|
"learning_rate": 4.0676822978655874e-05, |
|
"loss": 0.0004, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5617710069031183, |
|
"grad_norm": 0.007770949974656105, |
|
"learning_rate": 4.063714988494803e-05, |
|
"loss": 0.0006, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5641513925255891, |
|
"grad_norm": 0.01227940246462822, |
|
"learning_rate": 4.059747679124019e-05, |
|
"loss": 0.0003, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.56653177814806, |
|
"grad_norm": 0.2204684615135193, |
|
"learning_rate": 4.055780369753234e-05, |
|
"loss": 0.0003, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5689121637705308, |
|
"grad_norm": 0.03364790603518486, |
|
"learning_rate": 4.051813060382449e-05, |
|
"loss": 0.0003, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5712925493930017, |
|
"grad_norm": 0.049715492874383926, |
|
"learning_rate": 4.047845751011664e-05, |
|
"loss": 0.0003, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5736729350154725, |
|
"grad_norm": 0.028070533648133278, |
|
"learning_rate": 4.0438784416408796e-05, |
|
"loss": 0.0007, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5760533206379433, |
|
"grad_norm": 0.020421486347913742, |
|
"learning_rate": 4.0399111322700946e-05, |
|
"loss": 0.0009, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5784337062604142, |
|
"grad_norm": 0.010064806789159775, |
|
"learning_rate": 4.0359438228993097e-05, |
|
"loss": 0.0008, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.580814091882885, |
|
"grad_norm": 0.3017018735408783, |
|
"learning_rate": 4.0319765135285254e-05, |
|
"loss": 0.001, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5831944775053559, |
|
"grad_norm": 0.20759595930576324, |
|
"learning_rate": 4.0280092041577404e-05, |
|
"loss": 0.0003, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5855748631278267, |
|
"grad_norm": 0.016160350292921066, |
|
"learning_rate": 4.0240418947869554e-05, |
|
"loss": 0.0006, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5879552487502976, |
|
"grad_norm": 0.5293152332305908, |
|
"learning_rate": 4.020074585416171e-05, |
|
"loss": 0.0011, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5903356343727684, |
|
"grad_norm": 0.007493559271097183, |
|
"learning_rate": 4.016107276045386e-05, |
|
"loss": 0.0004, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5927160199952393, |
|
"grad_norm": 0.018649157136678696, |
|
"learning_rate": 4.012139966674601e-05, |
|
"loss": 0.0005, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.59509640561771, |
|
"grad_norm": 0.01135182660073042, |
|
"learning_rate": 4.008172657303817e-05, |
|
"loss": 0.0004, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5974767912401809, |
|
"grad_norm": 0.0733335018157959, |
|
"learning_rate": 4.004205347933032e-05, |
|
"loss": 0.0005, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.5998571768626517, |
|
"grad_norm": 0.02785026654601097, |
|
"learning_rate": 4.0002380385622476e-05, |
|
"loss": 0.0003, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6022375624851226, |
|
"grad_norm": 0.005258665420114994, |
|
"learning_rate": 3.996270729191463e-05, |
|
"loss": 0.0002, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6046179481075934, |
|
"grad_norm": 0.006735061760991812, |
|
"learning_rate": 3.992303419820678e-05, |
|
"loss": 0.0003, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6069983337300643, |
|
"grad_norm": 0.008341578766703606, |
|
"learning_rate": 3.988336110449893e-05, |
|
"loss": 0.0003, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6093787193525351, |
|
"grad_norm": 0.0027205003425478935, |
|
"learning_rate": 3.9843688010791084e-05, |
|
"loss": 0.0003, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.611759104975006, |
|
"grad_norm": 0.01718416064977646, |
|
"learning_rate": 3.9804014917083235e-05, |
|
"loss": 0.0005, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6141394905974767, |
|
"grad_norm": 0.06104213371872902, |
|
"learning_rate": 3.976434182337539e-05, |
|
"loss": 0.0002, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6165198762199476, |
|
"grad_norm": 0.008454731665551662, |
|
"learning_rate": 3.972466872966754e-05, |
|
"loss": 0.0001, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6189002618424184, |
|
"grad_norm": 0.006591182202100754, |
|
"learning_rate": 3.968499563595969e-05, |
|
"loss": 0.0002, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6212806474648893, |
|
"grad_norm": 0.009718428365886211, |
|
"learning_rate": 3.964532254225184e-05, |
|
"loss": 0.0019, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6236610330873602, |
|
"grad_norm": 0.0156183410435915, |
|
"learning_rate": 3.9605649448544e-05, |
|
"loss": 0.0002, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.626041418709831, |
|
"grad_norm": 0.012816215865314007, |
|
"learning_rate": 3.956597635483616e-05, |
|
"loss": 0.0008, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6284218043323019, |
|
"grad_norm": 0.0211672130972147, |
|
"learning_rate": 3.952630326112831e-05, |
|
"loss": 0.0002, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6308021899547727, |
|
"grad_norm": 0.012701870873570442, |
|
"learning_rate": 3.948663016742046e-05, |
|
"loss": 0.0003, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6331825755772436, |
|
"grad_norm": 0.008668744005262852, |
|
"learning_rate": 3.944695707371261e-05, |
|
"loss": 0.0002, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6355629611997143, |
|
"grad_norm": 0.020911380648612976, |
|
"learning_rate": 3.940728398000476e-05, |
|
"loss": 0.0004, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6379433468221852, |
|
"grad_norm": 0.0015960232121869922, |
|
"learning_rate": 3.9367610886296915e-05, |
|
"loss": 0.0002, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.640323732444656, |
|
"grad_norm": 0.01783674582839012, |
|
"learning_rate": 3.932793779258907e-05, |
|
"loss": 0.0001, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6427041180671269, |
|
"grad_norm": 0.006887937895953655, |
|
"learning_rate": 3.928826469888122e-05, |
|
"loss": 0.0002, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6450845036895977, |
|
"grad_norm": 0.004555295687168837, |
|
"learning_rate": 3.924859160517337e-05, |
|
"loss": 0.0002, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6474648893120686, |
|
"grad_norm": 0.00994735024869442, |
|
"learning_rate": 3.920891851146552e-05, |
|
"loss": 0.0003, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6498452749345394, |
|
"grad_norm": 0.03482622653245926, |
|
"learning_rate": 3.916924541775768e-05, |
|
"loss": 0.0002, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6522256605570103, |
|
"grad_norm": 0.06792888045310974, |
|
"learning_rate": 3.912957232404983e-05, |
|
"loss": 0.0002, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.654606046179481, |
|
"grad_norm": 0.02015574462711811, |
|
"learning_rate": 3.908989923034199e-05, |
|
"loss": 0.0008, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6569864318019519, |
|
"grad_norm": 0.07359887659549713, |
|
"learning_rate": 3.905022613663414e-05, |
|
"loss": 0.0003, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6593668174244227, |
|
"grad_norm": 0.006248469930142164, |
|
"learning_rate": 3.901055304292629e-05, |
|
"loss": 0.0002, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6617472030468936, |
|
"grad_norm": 0.01739078015089035, |
|
"learning_rate": 3.897087994921844e-05, |
|
"loss": 0.0002, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6641275886693644, |
|
"grad_norm": 0.008228071965277195, |
|
"learning_rate": 3.8931206855510596e-05, |
|
"loss": 0.0005, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6665079742918353, |
|
"grad_norm": 0.012569721788167953, |
|
"learning_rate": 3.8891533761802746e-05, |
|
"loss": 0.0002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6688883599143061, |
|
"grad_norm": 0.003245885483920574, |
|
"learning_rate": 3.88518606680949e-05, |
|
"loss": 0.0001, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.671268745536777, |
|
"grad_norm": 0.010106906294822693, |
|
"learning_rate": 3.8812187574387053e-05, |
|
"loss": 0.0002, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6736491311592478, |
|
"grad_norm": 0.0013821216998621821, |
|
"learning_rate": 3.8772514480679204e-05, |
|
"loss": 0.0002, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6760295167817186, |
|
"grad_norm": 0.008525123819708824, |
|
"learning_rate": 3.873284138697136e-05, |
|
"loss": 0.0001, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6784099024041895, |
|
"grad_norm": 0.0045269266702234745, |
|
"learning_rate": 3.869316829326351e-05, |
|
"loss": 0.0001, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6807902880266603, |
|
"grad_norm": 0.005178367253392935, |
|
"learning_rate": 3.865349519955566e-05, |
|
"loss": 0.0002, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6831706736491312, |
|
"grad_norm": 0.015604405663907528, |
|
"learning_rate": 3.861382210584781e-05, |
|
"loss": 0.0001, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.685551059271602, |
|
"grad_norm": 0.7911249399185181, |
|
"learning_rate": 3.857414901213997e-05, |
|
"loss": 0.0002, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6879314448940729, |
|
"grad_norm": 0.005056778434664011, |
|
"learning_rate": 3.853447591843212e-05, |
|
"loss": 0.0003, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6903118305165437, |
|
"grad_norm": 0.007354553788900375, |
|
"learning_rate": 3.8494802824724276e-05, |
|
"loss": 0.0002, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6926922161390145, |
|
"grad_norm": 0.10069092363119125, |
|
"learning_rate": 3.8455129731016427e-05, |
|
"loss": 0.0003, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6950726017614853, |
|
"grad_norm": 0.007913509383797646, |
|
"learning_rate": 3.841545663730858e-05, |
|
"loss": 0.0005, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6974529873839562, |
|
"grad_norm": 0.04653599485754967, |
|
"learning_rate": 3.837578354360073e-05, |
|
"loss": 0.0005, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.699833373006427, |
|
"grad_norm": 0.007795447017997503, |
|
"learning_rate": 3.8336110449892884e-05, |
|
"loss": 0.0002, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7022137586288979, |
|
"grad_norm": 0.0843840092420578, |
|
"learning_rate": 3.829643735618504e-05, |
|
"loss": 0.0008, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7045941442513687, |
|
"grad_norm": 0.019790470600128174, |
|
"learning_rate": 3.825676426247719e-05, |
|
"loss": 0.0004, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7069745298738396, |
|
"grad_norm": 0.04970049858093262, |
|
"learning_rate": 3.821709116876934e-05, |
|
"loss": 0.0008, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7093549154963104, |
|
"grad_norm": 0.011334414593875408, |
|
"learning_rate": 3.817741807506149e-05, |
|
"loss": 0.0003, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7117353011187812, |
|
"grad_norm": 0.12627428770065308, |
|
"learning_rate": 3.813774498135364e-05, |
|
"loss": 0.0006, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.714115686741252, |
|
"grad_norm": 0.03299270570278168, |
|
"learning_rate": 3.8098071887645806e-05, |
|
"loss": 0.0006, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7164960723637229, |
|
"grad_norm": 0.014470428228378296, |
|
"learning_rate": 3.805839879393796e-05, |
|
"loss": 0.0002, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7188764579861938, |
|
"grad_norm": 0.010081595741212368, |
|
"learning_rate": 3.801872570023011e-05, |
|
"loss": 0.0002, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7212568436086646, |
|
"grad_norm": 0.006527799181640148, |
|
"learning_rate": 3.797905260652226e-05, |
|
"loss": 0.0005, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7236372292311355, |
|
"grad_norm": 0.025967439636588097, |
|
"learning_rate": 3.793937951281441e-05, |
|
"loss": 0.0003, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7260176148536063, |
|
"grad_norm": 0.012788872234523296, |
|
"learning_rate": 3.7899706419106565e-05, |
|
"loss": 0.0006, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7283980004760772, |
|
"grad_norm": 0.05159073323011398, |
|
"learning_rate": 3.7860033325398715e-05, |
|
"loss": 0.0002, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.730778386098548, |
|
"grad_norm": 0.09669562429189682, |
|
"learning_rate": 3.782036023169087e-05, |
|
"loss": 0.0003, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7331587717210188, |
|
"grad_norm": 0.0008232035324908793, |
|
"learning_rate": 3.778068713798302e-05, |
|
"loss": 0.0002, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7355391573434896, |
|
"grad_norm": 0.0026904919650405645, |
|
"learning_rate": 3.774101404427517e-05, |
|
"loss": 0.0008, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7379195429659605, |
|
"grad_norm": 0.22064454853534698, |
|
"learning_rate": 3.770134095056733e-05, |
|
"loss": 0.0001, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7402999285884313, |
|
"grad_norm": 0.0037417325656861067, |
|
"learning_rate": 3.766166785685948e-05, |
|
"loss": 0.0002, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7426803142109022, |
|
"grad_norm": 0.008903945796191692, |
|
"learning_rate": 3.762199476315163e-05, |
|
"loss": 0.0004, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.745060699833373, |
|
"grad_norm": 0.01190115325152874, |
|
"learning_rate": 3.758232166944379e-05, |
|
"loss": 0.0003, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7474410854558439, |
|
"grad_norm": 0.005016674287617207, |
|
"learning_rate": 3.754264857573594e-05, |
|
"loss": 0.0002, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7498214710783146, |
|
"grad_norm": 0.009286819957196712, |
|
"learning_rate": 3.750297548202809e-05, |
|
"loss": 0.0002, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7522018567007855, |
|
"grad_norm": 0.06282204389572144, |
|
"learning_rate": 3.7463302388320245e-05, |
|
"loss": 0.0003, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.7545822423232563, |
|
"grad_norm": 0.008628441952168941, |
|
"learning_rate": 3.7423629294612396e-05, |
|
"loss": 0.0003, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7569626279457272, |
|
"grad_norm": 0.03511732071638107, |
|
"learning_rate": 3.7383956200904546e-05, |
|
"loss": 0.0001, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.759343013568198, |
|
"grad_norm": 0.003294560592621565, |
|
"learning_rate": 3.73442831071967e-05, |
|
"loss": 0.0003, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7617233991906689, |
|
"grad_norm": 0.032009340822696686, |
|
"learning_rate": 3.730461001348885e-05, |
|
"loss": 0.0002, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7641037848131397, |
|
"grad_norm": 0.022615088149905205, |
|
"learning_rate": 3.726493691978101e-05, |
|
"loss": 0.0002, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.7664841704356106, |
|
"grad_norm": 0.0026582872960716486, |
|
"learning_rate": 3.722526382607316e-05, |
|
"loss": 0.0001, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7688645560580815, |
|
"grad_norm": 0.3148833215236664, |
|
"learning_rate": 3.718559073236531e-05, |
|
"loss": 0.0002, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7712449416805522, |
|
"grad_norm": 0.03451314941048622, |
|
"learning_rate": 3.714591763865746e-05, |
|
"loss": 0.0002, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7736253273030231, |
|
"grad_norm": 0.008008177392184734, |
|
"learning_rate": 3.710624454494961e-05, |
|
"loss": 0.0001, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7760057129254939, |
|
"grad_norm": 0.07701031118631363, |
|
"learning_rate": 3.706657145124177e-05, |
|
"loss": 0.0005, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7783860985479648, |
|
"grad_norm": 0.010465078055858612, |
|
"learning_rate": 3.7026898357533926e-05, |
|
"loss": 0.0002, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7807664841704356, |
|
"grad_norm": 0.00499736238270998, |
|
"learning_rate": 3.6987225263826076e-05, |
|
"loss": 0.0007, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7831468697929065, |
|
"grad_norm": 0.6453936696052551, |
|
"learning_rate": 3.6947552170118226e-05, |
|
"loss": 0.0003, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.7855272554153773, |
|
"grad_norm": 0.016864465549588203, |
|
"learning_rate": 3.690787907641038e-05, |
|
"loss": 0.0003, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7879076410378482, |
|
"grad_norm": 0.05074018985033035, |
|
"learning_rate": 3.6868205982702534e-05, |
|
"loss": 0.0002, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7902880266603189, |
|
"grad_norm": 0.006529835984110832, |
|
"learning_rate": 3.682853288899469e-05, |
|
"loss": 0.0005, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7926684122827898, |
|
"grad_norm": 0.041339557617902756, |
|
"learning_rate": 3.678885979528684e-05, |
|
"loss": 0.0004, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.7950487979052606, |
|
"grad_norm": 0.006891661789268255, |
|
"learning_rate": 3.674918670157899e-05, |
|
"loss": 0.0004, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.7974291835277315, |
|
"grad_norm": 0.01043302658945322, |
|
"learning_rate": 3.670951360787114e-05, |
|
"loss": 0.0003, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7998095691502023, |
|
"grad_norm": 0.01914358325302601, |
|
"learning_rate": 3.666984051416329e-05, |
|
"loss": 0.0008, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8021899547726732, |
|
"grad_norm": 0.016266925260424614, |
|
"learning_rate": 3.663016742045545e-05, |
|
"loss": 0.0004, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.804570340395144, |
|
"grad_norm": 0.005765034817159176, |
|
"learning_rate": 3.6590494326747606e-05, |
|
"loss": 0.0002, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8069507260176149, |
|
"grad_norm": 0.007664472330361605, |
|
"learning_rate": 3.6550821233039757e-05, |
|
"loss": 0.0002, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8093311116400856, |
|
"grad_norm": 0.00499699218198657, |
|
"learning_rate": 3.651114813933191e-05, |
|
"loss": 0.0001, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8117114972625565, |
|
"grad_norm": 0.012575655244290829, |
|
"learning_rate": 3.647147504562406e-05, |
|
"loss": 0.0002, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8140918828850274, |
|
"grad_norm": 0.010001065209507942, |
|
"learning_rate": 3.6431801951916214e-05, |
|
"loss": 0.0005, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8164722685074982, |
|
"grad_norm": 0.06131220981478691, |
|
"learning_rate": 3.6392128858208365e-05, |
|
"loss": 0.0002, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8188526541299691, |
|
"grad_norm": 0.037141721695661545, |
|
"learning_rate": 3.6352455764500515e-05, |
|
"loss": 0.0001, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8212330397524399, |
|
"grad_norm": 0.05955801159143448, |
|
"learning_rate": 3.631278267079267e-05, |
|
"loss": 0.0005, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8236134253749108, |
|
"grad_norm": 0.012499036267399788, |
|
"learning_rate": 3.627310957708482e-05, |
|
"loss": 0.0002, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8259938109973816, |
|
"grad_norm": 0.007782169617712498, |
|
"learning_rate": 3.623343648337697e-05, |
|
"loss": 0.0004, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8283741966198525, |
|
"grad_norm": 0.016740377992391586, |
|
"learning_rate": 3.619376338966913e-05, |
|
"loss": 0.0004, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8307545822423232, |
|
"grad_norm": 0.05157579854130745, |
|
"learning_rate": 3.615409029596128e-05, |
|
"loss": 0.0003, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8331349678647941, |
|
"grad_norm": 0.00816064327955246, |
|
"learning_rate": 3.611441720225343e-05, |
|
"loss": 0.0003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8355153534872649, |
|
"grad_norm": 0.02470710128545761, |
|
"learning_rate": 3.607474410854559e-05, |
|
"loss": 0.0002, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8378957391097358, |
|
"grad_norm": 0.004836896900087595, |
|
"learning_rate": 3.603507101483774e-05, |
|
"loss": 0.0001, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8402761247322066, |
|
"grad_norm": 0.003796802368015051, |
|
"learning_rate": 3.5995397921129895e-05, |
|
"loss": 0.0002, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8426565103546775, |
|
"grad_norm": 0.006737705785781145, |
|
"learning_rate": 3.5955724827422045e-05, |
|
"loss": 0.0003, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8450368959771483, |
|
"grad_norm": 0.0021388079039752483, |
|
"learning_rate": 3.5916051733714195e-05, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8474172815996192, |
|
"grad_norm": 0.047663912177085876, |
|
"learning_rate": 3.5876378640006346e-05, |
|
"loss": 0.0001, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8497976672220899, |
|
"grad_norm": 0.015320863574743271, |
|
"learning_rate": 3.58367055462985e-05, |
|
"loss": 0.0002, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8521780528445608, |
|
"grad_norm": 0.008627827279269695, |
|
"learning_rate": 3.579703245259065e-05, |
|
"loss": 0.0003, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8545584384670316, |
|
"grad_norm": 0.0034904240164905787, |
|
"learning_rate": 3.575735935888281e-05, |
|
"loss": 0.0001, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8569388240895025, |
|
"grad_norm": 0.01078026182949543, |
|
"learning_rate": 3.571768626517496e-05, |
|
"loss": 0.0002, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8593192097119733, |
|
"grad_norm": 0.011285877786576748, |
|
"learning_rate": 3.567801317146711e-05, |
|
"loss": 0.0007, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8616995953344442, |
|
"grad_norm": 0.005885743070393801, |
|
"learning_rate": 3.563834007775926e-05, |
|
"loss": 0.0003, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8640799809569151, |
|
"grad_norm": 0.1011798158288002, |
|
"learning_rate": 3.559866698405142e-05, |
|
"loss": 0.0002, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8664603665793859, |
|
"grad_norm": 0.012861615046858788, |
|
"learning_rate": 3.5558993890343575e-05, |
|
"loss": 0.0002, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8688407522018567, |
|
"grad_norm": 0.009324765764176846, |
|
"learning_rate": 3.5519320796635726e-05, |
|
"loss": 0.0007, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8712211378243275, |
|
"grad_norm": 0.0035065708216279745, |
|
"learning_rate": 3.5479647702927876e-05, |
|
"loss": 0.0004, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8736015234467984, |
|
"grad_norm": 0.010472165420651436, |
|
"learning_rate": 3.5439974609220026e-05, |
|
"loss": 0.0001, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8759819090692692, |
|
"grad_norm": 0.009073158726096153, |
|
"learning_rate": 3.5400301515512176e-05, |
|
"loss": 0.0001, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8783622946917401, |
|
"grad_norm": 0.0028665116988122463, |
|
"learning_rate": 3.5360628421804334e-05, |
|
"loss": 0.0001, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8807426803142109, |
|
"grad_norm": 0.009178753942251205, |
|
"learning_rate": 3.532095532809649e-05, |
|
"loss": 0.0003, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8831230659366818, |
|
"grad_norm": 0.007954353466629982, |
|
"learning_rate": 3.528128223438864e-05, |
|
"loss": 0.0006, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8855034515591526, |
|
"grad_norm": 0.009399271570146084, |
|
"learning_rate": 3.524160914068079e-05, |
|
"loss": 0.0002, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8878838371816234, |
|
"grad_norm": 0.0035749957896769047, |
|
"learning_rate": 3.520193604697294e-05, |
|
"loss": 0.0001, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8902642228040942, |
|
"grad_norm": 0.007753758691251278, |
|
"learning_rate": 3.51622629532651e-05, |
|
"loss": 0.0001, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8926446084265651, |
|
"grad_norm": 0.007471214048564434, |
|
"learning_rate": 3.512258985955725e-05, |
|
"loss": 0.0003, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.8950249940490359, |
|
"grad_norm": 0.016612932085990906, |
|
"learning_rate": 3.5082916765849406e-05, |
|
"loss": 0.0001, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.8974053796715068, |
|
"grad_norm": 0.008320000022649765, |
|
"learning_rate": 3.5043243672141556e-05, |
|
"loss": 0.0001, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.8997857652939776, |
|
"grad_norm": 0.010242090560495853, |
|
"learning_rate": 3.500357057843371e-05, |
|
"loss": 0.0001, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9021661509164485, |
|
"grad_norm": 0.0036350861191749573, |
|
"learning_rate": 3.4963897484725864e-05, |
|
"loss": 0.0001, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9045465365389193, |
|
"grad_norm": 0.002153201960027218, |
|
"learning_rate": 3.4924224391018014e-05, |
|
"loss": 0.0002, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9069269221613901, |
|
"grad_norm": 0.003587006125599146, |
|
"learning_rate": 3.4884551297310164e-05, |
|
"loss": 0.0002, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.909307307783861, |
|
"grad_norm": 0.006511629093438387, |
|
"learning_rate": 3.4844878203602315e-05, |
|
"loss": 0.0002, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9116876934063318, |
|
"grad_norm": 0.008945467881858349, |
|
"learning_rate": 3.480520510989447e-05, |
|
"loss": 0.0001, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9140680790288027, |
|
"grad_norm": 0.006604051683098078, |
|
"learning_rate": 3.476553201618662e-05, |
|
"loss": 0.0001, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9164484646512735, |
|
"grad_norm": 0.0031156474724411964, |
|
"learning_rate": 3.472585892247878e-05, |
|
"loss": 0.0003, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9188288502737444, |
|
"grad_norm": 0.005195919424295425, |
|
"learning_rate": 3.468618582877093e-05, |
|
"loss": 0.0001, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9212092358962152, |
|
"grad_norm": 0.008878687396645546, |
|
"learning_rate": 3.464651273506308e-05, |
|
"loss": 0.0001, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9235896215186861, |
|
"grad_norm": 0.0020940713584423065, |
|
"learning_rate": 3.460683964135523e-05, |
|
"loss": 0.0001, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9259700071411568, |
|
"grad_norm": 0.0066345930099487305, |
|
"learning_rate": 3.456716654764739e-05, |
|
"loss": 0.0001, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.9283503927636277, |
|
"grad_norm": 0.0018133444245904684, |
|
"learning_rate": 3.4527493453939544e-05, |
|
"loss": 0.0001, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9307307783860985, |
|
"grad_norm": 0.000830967677757144, |
|
"learning_rate": 3.4487820360231695e-05, |
|
"loss": 0.0001, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9331111640085694, |
|
"grad_norm": 0.0037288174498826265, |
|
"learning_rate": 3.4448147266523845e-05, |
|
"loss": 0.0001, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9354915496310402, |
|
"grad_norm": 0.005838675890117884, |
|
"learning_rate": 3.4408474172815995e-05, |
|
"loss": 0.0003, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9378719352535111, |
|
"grad_norm": 0.008044001646339893, |
|
"learning_rate": 3.4368801079108145e-05, |
|
"loss": 0.0002, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9402523208759819, |
|
"grad_norm": 0.07016938179731369, |
|
"learning_rate": 3.43291279854003e-05, |
|
"loss": 0.0005, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9426327064984528, |
|
"grad_norm": 0.11337173730134964, |
|
"learning_rate": 3.428945489169246e-05, |
|
"loss": 0.0002, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9450130921209235, |
|
"grad_norm": 0.0017598132835701108, |
|
"learning_rate": 3.424978179798461e-05, |
|
"loss": 0.0003, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.9473934777433944, |
|
"grad_norm": 0.030149806290864944, |
|
"learning_rate": 3.421010870427676e-05, |
|
"loss": 0.0003, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9497738633658652, |
|
"grad_norm": 0.11280670762062073, |
|
"learning_rate": 3.417043561056891e-05, |
|
"loss": 0.0003, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9521542489883361, |
|
"grad_norm": 0.02797405980527401, |
|
"learning_rate": 3.413076251686107e-05, |
|
"loss": 0.0003, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9545346346108069, |
|
"grad_norm": 0.009325963445007801, |
|
"learning_rate": 3.4091089423153225e-05, |
|
"loss": 0.0002, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9569150202332778, |
|
"grad_norm": 0.015098505653440952, |
|
"learning_rate": 3.4051416329445375e-05, |
|
"loss": 0.0002, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.9592954058557487, |
|
"grad_norm": 0.0010631170589476824, |
|
"learning_rate": 3.4011743235737525e-05, |
|
"loss": 0.0002, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9616757914782195, |
|
"grad_norm": 0.11537562310695648, |
|
"learning_rate": 3.3972070142029676e-05, |
|
"loss": 0.0004, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9640561771006904, |
|
"grad_norm": 0.055657465010881424, |
|
"learning_rate": 3.3932397048321826e-05, |
|
"loss": 0.0002, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.9664365627231611, |
|
"grad_norm": 0.004681292921304703, |
|
"learning_rate": 3.389272395461398e-05, |
|
"loss": 0.0001, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.968816948345632, |
|
"grad_norm": 0.0036875929217785597, |
|
"learning_rate": 3.385305086090613e-05, |
|
"loss": 0.0003, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9711973339681028, |
|
"grad_norm": 0.3181780278682709, |
|
"learning_rate": 3.381337776719829e-05, |
|
"loss": 0.0002, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9735777195905737, |
|
"grad_norm": 0.008175074122846127, |
|
"learning_rate": 3.377370467349044e-05, |
|
"loss": 0.0001, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9759581052130445, |
|
"grad_norm": 0.008897043764591217, |
|
"learning_rate": 3.373403157978259e-05, |
|
"loss": 0.0002, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.9783384908355154, |
|
"grad_norm": 0.005149902775883675, |
|
"learning_rate": 3.369435848607475e-05, |
|
"loss": 0.0001, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9807188764579862, |
|
"grad_norm": 0.005102005321532488, |
|
"learning_rate": 3.36546853923669e-05, |
|
"loss": 0.0001, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9830992620804571, |
|
"grad_norm": 0.003907215781509876, |
|
"learning_rate": 3.361501229865905e-05, |
|
"loss": 0.0001, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9854796477029278, |
|
"grad_norm": 0.006176768336445093, |
|
"learning_rate": 3.3575339204951206e-05, |
|
"loss": 0.0001, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9878600333253987, |
|
"grad_norm": 0.007574237417429686, |
|
"learning_rate": 3.3535666111243356e-05, |
|
"loss": 0.0001, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9902404189478695, |
|
"grad_norm": 0.0036479670088738203, |
|
"learning_rate": 3.3495993017535506e-05, |
|
"loss": 0.0001, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.9926208045703404, |
|
"grad_norm": 0.0031234126072376966, |
|
"learning_rate": 3.3456319923827664e-05, |
|
"loss": 0.0001, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.9950011901928112, |
|
"grad_norm": 0.015276722609996796, |
|
"learning_rate": 3.3416646830119814e-05, |
|
"loss": 0.0004, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.9973815758152821, |
|
"grad_norm": 0.015308289788663387, |
|
"learning_rate": 3.3376973736411964e-05, |
|
"loss": 0.0002, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.9997619614377529, |
|
"grad_norm": 1.1039026975631714, |
|
"learning_rate": 3.333730064270412e-05, |
|
"loss": 0.0034, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 6.8751428443647455e-06, |
|
"eval_runtime": 52.0551, |
|
"eval_samples_per_second": 35.885, |
|
"eval_steps_per_second": 8.971, |
|
"step": 4201 |
|
}, |
|
{ |
|
"epoch": 1.0021423470602238, |
|
"grad_norm": 0.4035731852054596, |
|
"learning_rate": 3.329762754899627e-05, |
|
"loss": 0.001, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.0045227326826947, |
|
"grad_norm": 0.058116745203733444, |
|
"learning_rate": 3.325795445528843e-05, |
|
"loss": 0.0006, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.0069031183051655, |
|
"grad_norm": 0.030079133808612823, |
|
"learning_rate": 3.321828136158058e-05, |
|
"loss": 0.0005, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.0092835039276362, |
|
"grad_norm": 0.03710814565420151, |
|
"learning_rate": 3.317860826787273e-05, |
|
"loss": 0.0008, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.011663889550107, |
|
"grad_norm": 0.25699111819267273, |
|
"learning_rate": 3.313893517416488e-05, |
|
"loss": 0.0003, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.014044275172578, |
|
"grad_norm": 0.01729218102991581, |
|
"learning_rate": 3.309926208045703e-05, |
|
"loss": 0.0004, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.016424660795049, |
|
"grad_norm": 0.004348506219685078, |
|
"learning_rate": 3.3059588986749194e-05, |
|
"loss": 0.0003, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.0188050464175196, |
|
"grad_norm": 0.008898822590708733, |
|
"learning_rate": 3.3019915893041344e-05, |
|
"loss": 0.0002, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.0211854320399905, |
|
"grad_norm": 0.021421125158667564, |
|
"learning_rate": 3.2980242799333494e-05, |
|
"loss": 0.0002, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.0235658176624614, |
|
"grad_norm": 0.09812607616186142, |
|
"learning_rate": 3.2940569705625645e-05, |
|
"loss": 0.0003, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.0259462032849322, |
|
"grad_norm": 0.00921029131859541, |
|
"learning_rate": 3.2900896611917795e-05, |
|
"loss": 0.0003, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.028326588907403, |
|
"grad_norm": 0.18005193769931793, |
|
"learning_rate": 3.286122351820995e-05, |
|
"loss": 0.0009, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.0307069745298738, |
|
"grad_norm": 0.022728268057107925, |
|
"learning_rate": 3.282155042450211e-05, |
|
"loss": 0.001, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.0330873601523447, |
|
"grad_norm": 0.002307797549292445, |
|
"learning_rate": 3.278187733079426e-05, |
|
"loss": 0.0001, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.0354677457748156, |
|
"grad_norm": 0.09381233900785446, |
|
"learning_rate": 3.274220423708641e-05, |
|
"loss": 0.0001, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.0378481313972863, |
|
"grad_norm": 0.30725282430648804, |
|
"learning_rate": 3.270253114337856e-05, |
|
"loss": 0.0006, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.0402285170197572, |
|
"grad_norm": 0.0028942192438989878, |
|
"learning_rate": 3.266285804967071e-05, |
|
"loss": 0.0002, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.042608902642228, |
|
"grad_norm": 0.07420436292886734, |
|
"learning_rate": 3.262318495596287e-05, |
|
"loss": 0.0001, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.044989288264699, |
|
"grad_norm": 0.0038873206358402967, |
|
"learning_rate": 3.2583511862255025e-05, |
|
"loss": 0.0004, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.0473696738871696, |
|
"grad_norm": 0.00487096281722188, |
|
"learning_rate": 3.2543838768547175e-05, |
|
"loss": 0.0001, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.0497500595096405, |
|
"grad_norm": 0.00458755437284708, |
|
"learning_rate": 3.2504165674839325e-05, |
|
"loss": 0.0001, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.0521304451321114, |
|
"grad_norm": 0.003127770032733679, |
|
"learning_rate": 3.2464492581131475e-05, |
|
"loss": 0.0001, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.0545108307545823, |
|
"grad_norm": 0.0036109236534684896, |
|
"learning_rate": 3.242481948742363e-05, |
|
"loss": 0.0003, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.0568912163770532, |
|
"grad_norm": 0.01696913130581379, |
|
"learning_rate": 3.238514639371578e-05, |
|
"loss": 0.0001, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.0592716019995239, |
|
"grad_norm": 0.0007638796814717352, |
|
"learning_rate": 3.234547330000793e-05, |
|
"loss": 0.0003, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.0616519876219948, |
|
"grad_norm": 0.005359685514122248, |
|
"learning_rate": 3.230580020630009e-05, |
|
"loss": 0.0001, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.0640323732444656, |
|
"grad_norm": 0.008990432135760784, |
|
"learning_rate": 3.226612711259224e-05, |
|
"loss": 0.0002, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.0664127588669365, |
|
"grad_norm": 0.004698805510997772, |
|
"learning_rate": 3.22264540188844e-05, |
|
"loss": 0.0001, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.0687931444894072, |
|
"grad_norm": 0.07380379736423492, |
|
"learning_rate": 3.218678092517655e-05, |
|
"loss": 0.0005, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.071173530111878, |
|
"grad_norm": 0.0072670914232730865, |
|
"learning_rate": 3.21471078314687e-05, |
|
"loss": 0.0001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.073553915734349, |
|
"grad_norm": 0.003431397257372737, |
|
"learning_rate": 3.210743473776085e-05, |
|
"loss": 0.0001, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.0759343013568199, |
|
"grad_norm": 0.012710604816675186, |
|
"learning_rate": 3.2067761644053006e-05, |
|
"loss": 0.0001, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.0783146869792906, |
|
"grad_norm": 0.0029263871256262064, |
|
"learning_rate": 3.2028088550345156e-05, |
|
"loss": 0.0001, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.0806950726017615, |
|
"grad_norm": 0.0013361535966396332, |
|
"learning_rate": 3.198841545663731e-05, |
|
"loss": 0.0001, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.0830754582242323, |
|
"grad_norm": 0.0027455012314021587, |
|
"learning_rate": 3.194874236292946e-05, |
|
"loss": 0.0001, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.0854558438467032, |
|
"grad_norm": 0.0015189964324235916, |
|
"learning_rate": 3.1909069269221614e-05, |
|
"loss": 0.0002, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.087836229469174, |
|
"grad_norm": 0.019486431032419205, |
|
"learning_rate": 3.1869396175513764e-05, |
|
"loss": 0.0001, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.0902166150916448, |
|
"grad_norm": 0.009100046940147877, |
|
"learning_rate": 3.182972308180592e-05, |
|
"loss": 0.0002, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.0925970007141157, |
|
"grad_norm": 0.6804227828979492, |
|
"learning_rate": 3.179004998809808e-05, |
|
"loss": 0.0008, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.0949773863365866, |
|
"grad_norm": 0.004166084341704845, |
|
"learning_rate": 3.175037689439023e-05, |
|
"loss": 0.0004, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.0973577719590573, |
|
"grad_norm": 0.0014277161099016666, |
|
"learning_rate": 3.171070380068238e-05, |
|
"loss": 0.0005, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.0997381575815282, |
|
"grad_norm": 0.02292274497449398, |
|
"learning_rate": 3.167103070697453e-05, |
|
"loss": 0.0001, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.102118543203999, |
|
"grad_norm": 0.006580695044249296, |
|
"learning_rate": 3.163135761326668e-05, |
|
"loss": 0.0002, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.10449892882647, |
|
"grad_norm": 0.005075294058769941, |
|
"learning_rate": 3.1591684519558836e-05, |
|
"loss": 0.0002, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.1068793144489408, |
|
"grad_norm": 0.0034661772660911083, |
|
"learning_rate": 3.1552011425850994e-05, |
|
"loss": 0.0003, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.1092597000714115, |
|
"grad_norm": 0.0035978129599243402, |
|
"learning_rate": 3.1512338332143144e-05, |
|
"loss": 0.0002, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.1116400856938824, |
|
"grad_norm": 0.01178679708391428, |
|
"learning_rate": 3.1472665238435294e-05, |
|
"loss": 0.0001, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.1140204713163533, |
|
"grad_norm": 0.0021494280081242323, |
|
"learning_rate": 3.1432992144727444e-05, |
|
"loss": 0.0003, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.1164008569388242, |
|
"grad_norm": 0.00325006153434515, |
|
"learning_rate": 3.13933190510196e-05, |
|
"loss": 0.0001, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.1187812425612949, |
|
"grad_norm": 0.006323399022221565, |
|
"learning_rate": 3.135364595731175e-05, |
|
"loss": 0.0002, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.1211616281837657, |
|
"grad_norm": 0.006911338306963444, |
|
"learning_rate": 3.131397286360391e-05, |
|
"loss": 0.0001, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.1235420138062366, |
|
"grad_norm": 0.0032435038592666388, |
|
"learning_rate": 3.127429976989606e-05, |
|
"loss": 0.0001, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.1259223994287075, |
|
"grad_norm": 0.18325313925743103, |
|
"learning_rate": 3.123462667618821e-05, |
|
"loss": 0.0002, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.1283027850511782, |
|
"grad_norm": 0.12742838263511658, |
|
"learning_rate": 3.119495358248036e-05, |
|
"loss": 0.0004, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.130683170673649, |
|
"grad_norm": 0.001981141045689583, |
|
"learning_rate": 3.115528048877252e-05, |
|
"loss": 0.0002, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.13306355629612, |
|
"grad_norm": 0.0030578586738556623, |
|
"learning_rate": 3.111560739506467e-05, |
|
"loss": 0.0001, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.1354439419185909, |
|
"grad_norm": 0.00284597952850163, |
|
"learning_rate": 3.1075934301356824e-05, |
|
"loss": 0.0001, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.1378243275410616, |
|
"grad_norm": 0.023655202239751816, |
|
"learning_rate": 3.1036261207648975e-05, |
|
"loss": 0.0002, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.1402047131635324, |
|
"grad_norm": 0.008493321016430855, |
|
"learning_rate": 3.0996588113941125e-05, |
|
"loss": 0.0002, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.1425850987860033, |
|
"grad_norm": 0.0038551143370568752, |
|
"learning_rate": 3.095691502023328e-05, |
|
"loss": 0.0001, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.1449654844084742, |
|
"grad_norm": 0.0014539804542437196, |
|
"learning_rate": 3.091724192652543e-05, |
|
"loss": 0.0001, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.1473458700309451, |
|
"grad_norm": 0.0026364317163825035, |
|
"learning_rate": 3.087756883281758e-05, |
|
"loss": 0.0006, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.1497262556534158, |
|
"grad_norm": 0.0010660483967512846, |
|
"learning_rate": 3.083789573910973e-05, |
|
"loss": 0.0001, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.1521066412758867, |
|
"grad_norm": 0.005250291433185339, |
|
"learning_rate": 3.079822264540189e-05, |
|
"loss": 0.0013, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.1544870268983576, |
|
"grad_norm": 0.0824214443564415, |
|
"learning_rate": 3.075854955169404e-05, |
|
"loss": 0.0002, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.1568674125208283, |
|
"grad_norm": 0.003175609977915883, |
|
"learning_rate": 3.07188764579862e-05, |
|
"loss": 0.0006, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.1592477981432991, |
|
"grad_norm": 0.0015882077859714627, |
|
"learning_rate": 3.067920336427835e-05, |
|
"loss": 0.0001, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.16162818376577, |
|
"grad_norm": 0.003802343737334013, |
|
"learning_rate": 3.06395302705705e-05, |
|
"loss": 0.0001, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.164008569388241, |
|
"grad_norm": 0.002745629521086812, |
|
"learning_rate": 3.059985717686265e-05, |
|
"loss": 0.0004, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.1663889550107118, |
|
"grad_norm": 0.006173206493258476, |
|
"learning_rate": 3.0560184083154805e-05, |
|
"loss": 0.0001, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.1687693406331825, |
|
"grad_norm": 0.006407946813851595, |
|
"learning_rate": 3.052051098944696e-05, |
|
"loss": 0.0001, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.1711497262556534, |
|
"grad_norm": 0.017478201538324356, |
|
"learning_rate": 3.0480837895739113e-05, |
|
"loss": 0.0003, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.1735301118781243, |
|
"grad_norm": 0.0035310271196067333, |
|
"learning_rate": 3.0441164802031263e-05, |
|
"loss": 0.0001, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.1759104975005952, |
|
"grad_norm": 0.0057274349965155125, |
|
"learning_rate": 3.0401491708323417e-05, |
|
"loss": 0.0001, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.1782908831230658, |
|
"grad_norm": 0.013580716215074062, |
|
"learning_rate": 3.0361818614615567e-05, |
|
"loss": 0.0001, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.1806712687455367, |
|
"grad_norm": 0.005545695312321186, |
|
"learning_rate": 3.0322145520907724e-05, |
|
"loss": 0.0001, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.1830516543680076, |
|
"grad_norm": 0.001243342412635684, |
|
"learning_rate": 3.0282472427199875e-05, |
|
"loss": 0.0001, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.1854320399904785, |
|
"grad_norm": 0.004315751604735851, |
|
"learning_rate": 3.0242799333492028e-05, |
|
"loss": 0.0001, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.1878124256129494, |
|
"grad_norm": 0.0020015877671539783, |
|
"learning_rate": 3.020312623978418e-05, |
|
"loss": 0.0001, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.19019281123542, |
|
"grad_norm": 0.0013068486005067825, |
|
"learning_rate": 3.0163453146076332e-05, |
|
"loss": 0.0001, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.192573196857891, |
|
"grad_norm": 0.0020259215962141752, |
|
"learning_rate": 3.0123780052368483e-05, |
|
"loss": 0.0001, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.1949535824803619, |
|
"grad_norm": 0.00229440163820982, |
|
"learning_rate": 3.008410695866064e-05, |
|
"loss": 0.0001, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.1973339681028325, |
|
"grad_norm": 0.006487131118774414, |
|
"learning_rate": 3.0044433864952793e-05, |
|
"loss": 0.0, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.1997143537253034, |
|
"grad_norm": 0.0029580420814454556, |
|
"learning_rate": 3.0004760771244944e-05, |
|
"loss": 0.0001, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.2020947393477743, |
|
"grad_norm": 0.004215626046061516, |
|
"learning_rate": 2.9965087677537097e-05, |
|
"loss": 0.0002, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.2044751249702452, |
|
"grad_norm": 0.0045689307153224945, |
|
"learning_rate": 2.9925414583829248e-05, |
|
"loss": 0.0001, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.2068555105927161, |
|
"grad_norm": 0.0018343930132687092, |
|
"learning_rate": 2.9885741490121398e-05, |
|
"loss": 0.0001, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.2092358962151868, |
|
"grad_norm": 0.21103504300117493, |
|
"learning_rate": 2.984606839641355e-05, |
|
"loss": 0.0003, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.2116162818376577, |
|
"grad_norm": 0.04271009564399719, |
|
"learning_rate": 2.980639530270571e-05, |
|
"loss": 0.0003, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.2139966674601286, |
|
"grad_norm": 0.008761608973145485, |
|
"learning_rate": 2.976672220899786e-05, |
|
"loss": 0.0002, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.2163770530825995, |
|
"grad_norm": 0.002944928128272295, |
|
"learning_rate": 2.9727049115290013e-05, |
|
"loss": 0.0004, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.2187574387050701, |
|
"grad_norm": 0.0039098006673157215, |
|
"learning_rate": 2.9687376021582163e-05, |
|
"loss": 0.0001, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.221137824327541, |
|
"grad_norm": 0.007188912481069565, |
|
"learning_rate": 2.9647702927874317e-05, |
|
"loss": 0.0003, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.223518209950012, |
|
"grad_norm": 0.0020366155076771975, |
|
"learning_rate": 2.9608029834166467e-05, |
|
"loss": 0.0001, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.2258985955724828, |
|
"grad_norm": 0.0052825105376541615, |
|
"learning_rate": 2.9568356740458624e-05, |
|
"loss": 0.0001, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.2282789811949535, |
|
"grad_norm": 0.0322733074426651, |
|
"learning_rate": 2.9528683646750778e-05, |
|
"loss": 0.0002, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.2306593668174244, |
|
"grad_norm": 0.0030191782861948013, |
|
"learning_rate": 2.9489010553042928e-05, |
|
"loss": 0.0001, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.2330397524398953, |
|
"grad_norm": 0.0158090703189373, |
|
"learning_rate": 2.944933745933508e-05, |
|
"loss": 0.0001, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.2354201380623662, |
|
"grad_norm": 0.0023131452035158873, |
|
"learning_rate": 2.9409664365627232e-05, |
|
"loss": 0.0002, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.2378005236848368, |
|
"grad_norm": 0.0010957660852000117, |
|
"learning_rate": 2.9369991271919382e-05, |
|
"loss": 0.0001, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.2401809093073077, |
|
"grad_norm": 0.006251092534512281, |
|
"learning_rate": 2.933031817821154e-05, |
|
"loss": 0.0001, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.2425612949297786, |
|
"grad_norm": 0.002981637604534626, |
|
"learning_rate": 2.9290645084503693e-05, |
|
"loss": 0.0001, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.2449416805522495, |
|
"grad_norm": 0.0044878036715090275, |
|
"learning_rate": 2.9250971990795844e-05, |
|
"loss": 0.0009, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.2473220661747204, |
|
"grad_norm": 0.0026534402277320623, |
|
"learning_rate": 2.9211298897087997e-05, |
|
"loss": 0.0001, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.249702451797191, |
|
"grad_norm": 0.0017549542244523764, |
|
"learning_rate": 2.9171625803380148e-05, |
|
"loss": 0.0001, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.252082837419662, |
|
"grad_norm": 0.0030411062762141228, |
|
"learning_rate": 2.91319527096723e-05, |
|
"loss": 0.0001, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.2544632230421329, |
|
"grad_norm": 0.006810466758906841, |
|
"learning_rate": 2.909227961596445e-05, |
|
"loss": 0.0001, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.2568436086646035, |
|
"grad_norm": 0.008998183533549309, |
|
"learning_rate": 2.905260652225661e-05, |
|
"loss": 0.0001, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.2592239942870744, |
|
"grad_norm": 0.0006000595167279243, |
|
"learning_rate": 2.9012933428548762e-05, |
|
"loss": 0.0001, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.2616043799095453, |
|
"grad_norm": 0.0037659297231584787, |
|
"learning_rate": 2.8973260334840913e-05, |
|
"loss": 0.0001, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.2639847655320162, |
|
"grad_norm": 0.003123963950201869, |
|
"learning_rate": 2.8933587241133063e-05, |
|
"loss": 0.0001, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.2663651511544871, |
|
"grad_norm": 0.0024721056688576937, |
|
"learning_rate": 2.8893914147425217e-05, |
|
"loss": 0.0001, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.268745536776958, |
|
"grad_norm": 0.04851701855659485, |
|
"learning_rate": 2.8854241053717367e-05, |
|
"loss": 0.0002, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.2711259223994287, |
|
"grad_norm": 0.0003437872801441699, |
|
"learning_rate": 2.8814567960009524e-05, |
|
"loss": 0.0, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.2735063080218996, |
|
"grad_norm": 0.36953097581863403, |
|
"learning_rate": 2.8774894866301678e-05, |
|
"loss": 0.0002, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.2758866936443705, |
|
"grad_norm": 0.004762616939842701, |
|
"learning_rate": 2.8735221772593828e-05, |
|
"loss": 0.0, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.2782670792668411, |
|
"grad_norm": 0.0032022674567997456, |
|
"learning_rate": 2.8695548678885982e-05, |
|
"loss": 0.0001, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.280647464889312, |
|
"grad_norm": 0.112340047955513, |
|
"learning_rate": 2.8655875585178132e-05, |
|
"loss": 0.0001, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.283027850511783, |
|
"grad_norm": 0.0022161102388054132, |
|
"learning_rate": 2.8616202491470286e-05, |
|
"loss": 0.0001, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.2854082361342538, |
|
"grad_norm": 0.0012134364806115627, |
|
"learning_rate": 2.8576529397762443e-05, |
|
"loss": 0.0001, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.2877886217567247, |
|
"grad_norm": 0.003832167712971568, |
|
"learning_rate": 2.8536856304054593e-05, |
|
"loss": 0.0001, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.2901690073791954, |
|
"grad_norm": 0.001739076804369688, |
|
"learning_rate": 2.8497183210346743e-05, |
|
"loss": 0.0, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.2925493930016663, |
|
"grad_norm": 0.000749527825973928, |
|
"learning_rate": 2.8457510116638897e-05, |
|
"loss": 0.0001, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.2949297786241372, |
|
"grad_norm": 0.006486440543085337, |
|
"learning_rate": 2.8417837022931047e-05, |
|
"loss": 0.0001, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.2973101642466078, |
|
"grad_norm": 0.002875624457374215, |
|
"learning_rate": 2.83781639292232e-05, |
|
"loss": 0.0003, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.2996905498690787, |
|
"grad_norm": 0.011916677467525005, |
|
"learning_rate": 2.833849083551535e-05, |
|
"loss": 0.0002, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.3020709354915496, |
|
"grad_norm": 0.014456122182309628, |
|
"learning_rate": 2.829881774180751e-05, |
|
"loss": 0.0001, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.3044513211140205, |
|
"grad_norm": 0.00652431882917881, |
|
"learning_rate": 2.8259144648099662e-05, |
|
"loss": 0.0003, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.3068317067364914, |
|
"grad_norm": 0.004612395539879799, |
|
"learning_rate": 2.8219471554391813e-05, |
|
"loss": 0.0001, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.309212092358962, |
|
"grad_norm": 0.0016554853646084666, |
|
"learning_rate": 2.8179798460683966e-05, |
|
"loss": 0.0001, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.311592477981433, |
|
"grad_norm": 0.00955954473465681, |
|
"learning_rate": 2.8140125366976117e-05, |
|
"loss": 0.0003, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.3139728636039039, |
|
"grad_norm": 0.0014887260040268302, |
|
"learning_rate": 2.8100452273268267e-05, |
|
"loss": 0.0, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.3163532492263748, |
|
"grad_norm": 0.004022569395601749, |
|
"learning_rate": 2.8060779179560427e-05, |
|
"loss": 0.0001, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.3187336348488454, |
|
"grad_norm": 0.01300437469035387, |
|
"learning_rate": 2.8021106085852578e-05, |
|
"loss": 0.0001, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.3211140204713163, |
|
"grad_norm": 0.0033303312957286835, |
|
"learning_rate": 2.7981432992144728e-05, |
|
"loss": 0.0001, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.3234944060937872, |
|
"grad_norm": 0.00033377157524228096, |
|
"learning_rate": 2.794175989843688e-05, |
|
"loss": 0.0001, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.325874791716258, |
|
"grad_norm": 0.001646155840717256, |
|
"learning_rate": 2.7902086804729032e-05, |
|
"loss": 0.0, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.328255177338729, |
|
"grad_norm": 0.009458147920668125, |
|
"learning_rate": 2.7862413711021186e-05, |
|
"loss": 0.0, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.3306355629611997, |
|
"grad_norm": 0.044097207486629486, |
|
"learning_rate": 2.7822740617313343e-05, |
|
"loss": 0.0001, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.3330159485836706, |
|
"grad_norm": 0.3018762469291687, |
|
"learning_rate": 2.7783067523605493e-05, |
|
"loss": 0.0003, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.3353963342061415, |
|
"grad_norm": 0.00142444740049541, |
|
"learning_rate": 2.7743394429897647e-05, |
|
"loss": 0.0, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.3377767198286121, |
|
"grad_norm": 0.026065746322274208, |
|
"learning_rate": 2.7703721336189797e-05, |
|
"loss": 0.0001, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.340157105451083, |
|
"grad_norm": 0.002285444876179099, |
|
"learning_rate": 2.766404824248195e-05, |
|
"loss": 0.0004, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.342537491073554, |
|
"grad_norm": 0.0023544467985630035, |
|
"learning_rate": 2.76243751487741e-05, |
|
"loss": 0.0001, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.3449178766960248, |
|
"grad_norm": 0.005093382205814123, |
|
"learning_rate": 2.758470205506625e-05, |
|
"loss": 0.0008, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.3472982623184957, |
|
"grad_norm": 0.01395428366959095, |
|
"learning_rate": 2.754502896135841e-05, |
|
"loss": 0.0001, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.3496786479409664, |
|
"grad_norm": 0.0021814145147800446, |
|
"learning_rate": 2.7505355867650562e-05, |
|
"loss": 0.0001, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.3520590335634373, |
|
"grad_norm": 0.0020568270701915026, |
|
"learning_rate": 2.7465682773942712e-05, |
|
"loss": 0.0001, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.3544394191859082, |
|
"grad_norm": 0.001564579550176859, |
|
"learning_rate": 2.7426009680234866e-05, |
|
"loss": 0.0002, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.3568198048083788, |
|
"grad_norm": 0.0009057559072971344, |
|
"learning_rate": 2.7386336586527016e-05, |
|
"loss": 0.0001, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.3592001904308497, |
|
"grad_norm": 0.005018309690058231, |
|
"learning_rate": 2.734666349281917e-05, |
|
"loss": 0.0001, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.3615805760533206, |
|
"grad_norm": 0.0018629188416525722, |
|
"learning_rate": 2.7306990399111327e-05, |
|
"loss": 0.0003, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.3639609616757915, |
|
"grad_norm": 0.001482214662246406, |
|
"learning_rate": 2.7267317305403478e-05, |
|
"loss": 0.0001, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.3663413472982624, |
|
"grad_norm": 0.012405039742588997, |
|
"learning_rate": 2.722764421169563e-05, |
|
"loss": 0.0005, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.3687217329207333, |
|
"grad_norm": 0.0018485913751646876, |
|
"learning_rate": 2.718797111798778e-05, |
|
"loss": 0.0, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.371102118543204, |
|
"grad_norm": 0.0015681314980611205, |
|
"learning_rate": 2.7148298024279932e-05, |
|
"loss": 0.0001, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.3734825041656749, |
|
"grad_norm": 0.017725007608532906, |
|
"learning_rate": 2.7108624930572086e-05, |
|
"loss": 0.0001, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.3758628897881457, |
|
"grad_norm": 0.011187481693923473, |
|
"learning_rate": 2.7068951836864243e-05, |
|
"loss": 0.0001, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.3782432754106164, |
|
"grad_norm": 0.003125675953924656, |
|
"learning_rate": 2.7029278743156393e-05, |
|
"loss": 0.0001, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.3806236610330873, |
|
"grad_norm": 0.004620529245585203, |
|
"learning_rate": 2.6989605649448547e-05, |
|
"loss": 0.0001, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.3830040466555582, |
|
"grad_norm": 0.004881042055785656, |
|
"learning_rate": 2.6949932555740697e-05, |
|
"loss": 0.0001, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.385384432278029, |
|
"grad_norm": 0.015351341105997562, |
|
"learning_rate": 2.691025946203285e-05, |
|
"loss": 0.0001, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.3877648179005, |
|
"grad_norm": 0.06165415793657303, |
|
"learning_rate": 2.6870586368325e-05, |
|
"loss": 0.0001, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.3901452035229707, |
|
"grad_norm": 0.000691259338054806, |
|
"learning_rate": 2.6830913274617155e-05, |
|
"loss": 0.0001, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.3925255891454416, |
|
"grad_norm": 0.006264138966798782, |
|
"learning_rate": 2.6791240180909312e-05, |
|
"loss": 0.0, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.3949059747679124, |
|
"grad_norm": 0.0016265185549855232, |
|
"learning_rate": 2.6751567087201462e-05, |
|
"loss": 0.0001, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.3972863603903831, |
|
"grad_norm": 0.0036318551283329725, |
|
"learning_rate": 2.6711893993493616e-05, |
|
"loss": 0.0, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.399666746012854, |
|
"grad_norm": 0.0011168549535796046, |
|
"learning_rate": 2.6672220899785766e-05, |
|
"loss": 0.0001, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.402047131635325, |
|
"grad_norm": 0.011570369824767113, |
|
"learning_rate": 2.6632547806077916e-05, |
|
"loss": 0.0001, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.4044275172577958, |
|
"grad_norm": 0.004564432427287102, |
|
"learning_rate": 2.659287471237007e-05, |
|
"loss": 0.0001, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.4068079028802667, |
|
"grad_norm": 0.003310930449515581, |
|
"learning_rate": 2.6553201618662227e-05, |
|
"loss": 0.0001, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.4091882885027374, |
|
"grad_norm": 0.005474664270877838, |
|
"learning_rate": 2.6513528524954377e-05, |
|
"loss": 0.0, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.4115686741252083, |
|
"grad_norm": 0.003840883495286107, |
|
"learning_rate": 2.647385543124653e-05, |
|
"loss": 0.0, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.4139490597476791, |
|
"grad_norm": 0.0011354766320437193, |
|
"learning_rate": 2.643418233753868e-05, |
|
"loss": 0.0001, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.41632944537015, |
|
"grad_norm": 0.0011250395327806473, |
|
"learning_rate": 2.6394509243830835e-05, |
|
"loss": 0.0001, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.4187098309926207, |
|
"grad_norm": 0.0025986500550061464, |
|
"learning_rate": 2.6354836150122985e-05, |
|
"loss": 0.0, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.4210902166150916, |
|
"grad_norm": 0.0018986169015988708, |
|
"learning_rate": 2.6315163056415143e-05, |
|
"loss": 0.0001, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.4234706022375625, |
|
"grad_norm": 0.006072606425732374, |
|
"learning_rate": 2.6275489962707296e-05, |
|
"loss": 0.0001, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.4258509878600334, |
|
"grad_norm": 0.005382834933698177, |
|
"learning_rate": 2.6235816868999447e-05, |
|
"loss": 0.0001, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.4282313734825043, |
|
"grad_norm": 0.0069602313451468945, |
|
"learning_rate": 2.6196143775291597e-05, |
|
"loss": 0.0001, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.430611759104975, |
|
"grad_norm": 0.00503483647480607, |
|
"learning_rate": 2.615647068158375e-05, |
|
"loss": 0.0001, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.4329921447274458, |
|
"grad_norm": 0.009482208639383316, |
|
"learning_rate": 2.61167975878759e-05, |
|
"loss": 0.0001, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.4353725303499167, |
|
"grad_norm": 0.003071409650146961, |
|
"learning_rate": 2.6077124494168058e-05, |
|
"loss": 0.0003, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.4377529159723874, |
|
"grad_norm": 0.025201931595802307, |
|
"learning_rate": 2.603745140046021e-05, |
|
"loss": 0.0002, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.4401333015948583, |
|
"grad_norm": 0.029845217242836952, |
|
"learning_rate": 2.5997778306752362e-05, |
|
"loss": 0.0001, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.4425136872173292, |
|
"grad_norm": 0.002946893684566021, |
|
"learning_rate": 2.5958105213044516e-05, |
|
"loss": 0.0001, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.4448940728398, |
|
"grad_norm": 0.002334748860448599, |
|
"learning_rate": 2.5918432119336666e-05, |
|
"loss": 0.0001, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.447274458462271, |
|
"grad_norm": 0.0038676797412335873, |
|
"learning_rate": 2.587875902562882e-05, |
|
"loss": 0.0001, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.4496548440847417, |
|
"grad_norm": 0.39916858077049255, |
|
"learning_rate": 2.583908593192097e-05, |
|
"loss": 0.0005, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.4520352297072125, |
|
"grad_norm": 0.005464503075927496, |
|
"learning_rate": 2.5799412838213127e-05, |
|
"loss": 0.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.4544156153296834, |
|
"grad_norm": 0.002350292168557644, |
|
"learning_rate": 2.5759739744505277e-05, |
|
"loss": 0.0001, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.4567960009521541, |
|
"grad_norm": 0.02950800396502018, |
|
"learning_rate": 2.572006665079743e-05, |
|
"loss": 0.0001, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.459176386574625, |
|
"grad_norm": 0.0020270231179893017, |
|
"learning_rate": 2.568039355708958e-05, |
|
"loss": 0.0001, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.461556772197096, |
|
"grad_norm": 0.29163315892219543, |
|
"learning_rate": 2.5640720463381735e-05, |
|
"loss": 0.0004, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.4639371578195668, |
|
"grad_norm": 0.0028463418129831553, |
|
"learning_rate": 2.5601047369673885e-05, |
|
"loss": 0.0001, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.4663175434420377, |
|
"grad_norm": 0.007839919067919254, |
|
"learning_rate": 2.5561374275966042e-05, |
|
"loss": 0.0, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.4686979290645086, |
|
"grad_norm": 0.0009790142066776752, |
|
"learning_rate": 2.5521701182258196e-05, |
|
"loss": 0.0001, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.4710783146869792, |
|
"grad_norm": 0.019366919994354248, |
|
"learning_rate": 2.5482028088550346e-05, |
|
"loss": 0.0001, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.4734587003094501, |
|
"grad_norm": 0.002335514174774289, |
|
"learning_rate": 2.54423549948425e-05, |
|
"loss": 0.0001, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.475839085931921, |
|
"grad_norm": 0.004448035266250372, |
|
"learning_rate": 2.540268190113465e-05, |
|
"loss": 0.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.4782194715543917, |
|
"grad_norm": 0.0020590273197740316, |
|
"learning_rate": 2.53630088074268e-05, |
|
"loss": 0.0, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.4805998571768626, |
|
"grad_norm": 0.0015115641290321946, |
|
"learning_rate": 2.532333571371896e-05, |
|
"loss": 0.0001, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.4829802427993335, |
|
"grad_norm": 0.0024076756089925766, |
|
"learning_rate": 2.528366262001111e-05, |
|
"loss": 0.0003, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.4853606284218044, |
|
"grad_norm": 0.0048133935779333115, |
|
"learning_rate": 2.5243989526303262e-05, |
|
"loss": 0.0001, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.4877410140442753, |
|
"grad_norm": 0.015479459427297115, |
|
"learning_rate": 2.5204316432595416e-05, |
|
"loss": 0.0001, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.490121399666746, |
|
"grad_norm": 0.1010046973824501, |
|
"learning_rate": 2.5164643338887566e-05, |
|
"loss": 0.0001, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.4925017852892168, |
|
"grad_norm": 0.0011843384709209204, |
|
"learning_rate": 2.512497024517972e-05, |
|
"loss": 0.0002, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.4948821709116877, |
|
"grad_norm": 0.002041852567344904, |
|
"learning_rate": 2.508529715147187e-05, |
|
"loss": 0.0001, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.4972625565341584, |
|
"grad_norm": 0.002975156530737877, |
|
"learning_rate": 2.5045624057764027e-05, |
|
"loss": 0.0001, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.4996429421566293, |
|
"grad_norm": 0.005752989556640387, |
|
"learning_rate": 2.500595096405618e-05, |
|
"loss": 0.0001, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.5020233277791002, |
|
"grad_norm": 0.002325852634385228, |
|
"learning_rate": 2.496627787034833e-05, |
|
"loss": 0.0, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.504403713401571, |
|
"grad_norm": 0.006379146594554186, |
|
"learning_rate": 2.4926604776640485e-05, |
|
"loss": 0.0001, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.506784099024042, |
|
"grad_norm": 0.0011644313344731927, |
|
"learning_rate": 2.488693168293264e-05, |
|
"loss": 0.0, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.5091644846465129, |
|
"grad_norm": 0.06679144501686096, |
|
"learning_rate": 2.484725858922479e-05, |
|
"loss": 0.0001, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.5115448702689835, |
|
"grad_norm": 0.010065040551126003, |
|
"learning_rate": 2.4807585495516942e-05, |
|
"loss": 0.0003, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.5139252558914544, |
|
"grad_norm": 0.00404448714107275, |
|
"learning_rate": 2.4767912401809093e-05, |
|
"loss": 0.0001, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.516305641513925, |
|
"grad_norm": 0.005027102772146463, |
|
"learning_rate": 2.4728239308101246e-05, |
|
"loss": 0.0001, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.518686027136396, |
|
"grad_norm": 0.0007329948712140322, |
|
"learning_rate": 2.46885662143934e-05, |
|
"loss": 0.0001, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.521066412758867, |
|
"grad_norm": 0.008010495454072952, |
|
"learning_rate": 2.464889312068555e-05, |
|
"loss": 0.0001, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.5234467983813378, |
|
"grad_norm": 0.0004263845912646502, |
|
"learning_rate": 2.4609220026977704e-05, |
|
"loss": 0.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.5258271840038087, |
|
"grad_norm": 0.0008505060104653239, |
|
"learning_rate": 2.4569546933269858e-05, |
|
"loss": 0.0001, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.5282075696262796, |
|
"grad_norm": 0.005009577609598637, |
|
"learning_rate": 2.4529873839562008e-05, |
|
"loss": 0.0001, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.5305879552487502, |
|
"grad_norm": 0.0055831428617239, |
|
"learning_rate": 2.4490200745854165e-05, |
|
"loss": 0.0, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.5329683408712211, |
|
"grad_norm": 0.0025661292020231485, |
|
"learning_rate": 2.4450527652146315e-05, |
|
"loss": 0.0002, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.535348726493692, |
|
"grad_norm": 0.002652715193107724, |
|
"learning_rate": 2.4410854558438466e-05, |
|
"loss": 0.0, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.5377291121161627, |
|
"grad_norm": 0.0017773109721019864, |
|
"learning_rate": 2.4371181464730623e-05, |
|
"loss": 0.0001, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.5401094977386336, |
|
"grad_norm": 0.023734472692012787, |
|
"learning_rate": 2.4331508371022773e-05, |
|
"loss": 0.0001, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.5424898833611045, |
|
"grad_norm": 0.0018312609754502773, |
|
"learning_rate": 2.4291835277314927e-05, |
|
"loss": 0.0001, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.5448702689835754, |
|
"grad_norm": 0.004327055066823959, |
|
"learning_rate": 2.425216218360708e-05, |
|
"loss": 0.0001, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.5472506546060463, |
|
"grad_norm": 0.0021172019187361, |
|
"learning_rate": 2.421248908989923e-05, |
|
"loss": 0.0001, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.5496310402285172, |
|
"grad_norm": 0.001905101933516562, |
|
"learning_rate": 2.4172815996191385e-05, |
|
"loss": 0.0, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.5520114258509878, |
|
"grad_norm": 0.0016990803414955735, |
|
"learning_rate": 2.4133142902483538e-05, |
|
"loss": 0.0001, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.5543918114734587, |
|
"grad_norm": 0.0022508346009999514, |
|
"learning_rate": 2.409346980877569e-05, |
|
"loss": 0.0001, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.5567721970959294, |
|
"grad_norm": 0.0018837592797353864, |
|
"learning_rate": 2.4053796715067842e-05, |
|
"loss": 0.0001, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.5591525827184003, |
|
"grad_norm": 0.001968635246157646, |
|
"learning_rate": 2.4014123621359993e-05, |
|
"loss": 0.0002, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.5615329683408712, |
|
"grad_norm": 0.0019730927888303995, |
|
"learning_rate": 2.397445052765215e-05, |
|
"loss": 0.0001, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.563913353963342, |
|
"grad_norm": 0.0006384404841810465, |
|
"learning_rate": 2.39347774339443e-05, |
|
"loss": 0.0, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.566293739585813, |
|
"grad_norm": 0.05303851515054703, |
|
"learning_rate": 2.389510434023645e-05, |
|
"loss": 0.0002, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.5686741252082839, |
|
"grad_norm": 0.009338784962892532, |
|
"learning_rate": 2.3855431246528607e-05, |
|
"loss": 0.0, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.5710545108307545, |
|
"grad_norm": 0.001042340649291873, |
|
"learning_rate": 2.3815758152820758e-05, |
|
"loss": 0.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.5734348964532254, |
|
"grad_norm": 0.008856063708662987, |
|
"learning_rate": 2.377608505911291e-05, |
|
"loss": 0.0001, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.575815282075696, |
|
"grad_norm": 0.0010636444203555584, |
|
"learning_rate": 2.3736411965405065e-05, |
|
"loss": 0.0001, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.578195667698167, |
|
"grad_norm": 0.044303007423877716, |
|
"learning_rate": 2.3696738871697215e-05, |
|
"loss": 0.0001, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.5805760533206379, |
|
"grad_norm": 0.003368295030668378, |
|
"learning_rate": 2.365706577798937e-05, |
|
"loss": 0.0001, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.5829564389431088, |
|
"grad_norm": 0.0010406200308352709, |
|
"learning_rate": 2.3617392684281523e-05, |
|
"loss": 0.0001, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.5853368245655797, |
|
"grad_norm": 0.009850569069385529, |
|
"learning_rate": 2.3577719590573673e-05, |
|
"loss": 0.0, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.5877172101880506, |
|
"grad_norm": 0.00514467665925622, |
|
"learning_rate": 2.3538046496865827e-05, |
|
"loss": 0.0001, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.5900975958105215, |
|
"grad_norm": 0.00200643390417099, |
|
"learning_rate": 2.349837340315798e-05, |
|
"loss": 0.0002, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.5924779814329921, |
|
"grad_norm": 0.01371715497225523, |
|
"learning_rate": 2.345870030945013e-05, |
|
"loss": 0.0002, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.594858367055463, |
|
"grad_norm": 0.0005170275107957423, |
|
"learning_rate": 2.3419027215742284e-05, |
|
"loss": 0.0001, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.5972387526779337, |
|
"grad_norm": 0.0018967930227518082, |
|
"learning_rate": 2.3379354122034438e-05, |
|
"loss": 0.0, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.5996191383004046, |
|
"grad_norm": 0.002288557356223464, |
|
"learning_rate": 2.3339681028326592e-05, |
|
"loss": 0.0003, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.6019995239228755, |
|
"grad_norm": 0.0017687254585325718, |
|
"learning_rate": 2.3300007934618742e-05, |
|
"loss": 0.0001, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.6043799095453464, |
|
"grad_norm": 0.023880669847130775, |
|
"learning_rate": 2.3260334840910893e-05, |
|
"loss": 0.0001, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.6067602951678173, |
|
"grad_norm": 0.004767647013068199, |
|
"learning_rate": 2.322066174720305e-05, |
|
"loss": 0.0001, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.6091406807902882, |
|
"grad_norm": 0.0016061540227383375, |
|
"learning_rate": 2.31809886534952e-05, |
|
"loss": 0.0001, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.6115210664127588, |
|
"grad_norm": 0.009586431086063385, |
|
"learning_rate": 2.3141315559787354e-05, |
|
"loss": 0.0001, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.6139014520352297, |
|
"grad_norm": 0.003596968250349164, |
|
"learning_rate": 2.3101642466079507e-05, |
|
"loss": 0.0001, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.6162818376577004, |
|
"grad_norm": 0.003184641245752573, |
|
"learning_rate": 2.3061969372371658e-05, |
|
"loss": 0.0001, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.6186622232801713, |
|
"grad_norm": 0.02113034948706627, |
|
"learning_rate": 2.302229627866381e-05, |
|
"loss": 0.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.6210426089026422, |
|
"grad_norm": 0.0022694601211696863, |
|
"learning_rate": 2.2982623184955965e-05, |
|
"loss": 0.0001, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.623422994525113, |
|
"grad_norm": 0.0007104437099769711, |
|
"learning_rate": 2.2942950091248115e-05, |
|
"loss": 0.0, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.625803380147584, |
|
"grad_norm": 0.004562158603221178, |
|
"learning_rate": 2.290327699754027e-05, |
|
"loss": 0.0001, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.6281837657700549, |
|
"grad_norm": 0.0015846043825149536, |
|
"learning_rate": 2.2863603903832423e-05, |
|
"loss": 0.0001, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.6305641513925258, |
|
"grad_norm": 0.012255080044269562, |
|
"learning_rate": 2.2823930810124573e-05, |
|
"loss": 0.0001, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.6329445370149964, |
|
"grad_norm": 0.0012517154682427645, |
|
"learning_rate": 2.2784257716416727e-05, |
|
"loss": 0.0001, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.6353249226374673, |
|
"grad_norm": 0.0006557099404744804, |
|
"learning_rate": 2.274458462270888e-05, |
|
"loss": 0.0001, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.637705308259938, |
|
"grad_norm": 0.0007641498814336956, |
|
"learning_rate": 2.2704911529001034e-05, |
|
"loss": 0.0, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.6400856938824089, |
|
"grad_norm": 0.005642781965434551, |
|
"learning_rate": 2.2665238435293184e-05, |
|
"loss": 0.0, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.6424660795048798, |
|
"grad_norm": 0.0022149153519421816, |
|
"learning_rate": 2.2625565341585338e-05, |
|
"loss": 0.0004, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.6448464651273507, |
|
"grad_norm": 0.8982350826263428, |
|
"learning_rate": 2.2585892247877492e-05, |
|
"loss": 0.0003, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.6472268507498216, |
|
"grad_norm": 0.002032769611105323, |
|
"learning_rate": 2.2546219154169642e-05, |
|
"loss": 0.0001, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.6496072363722925, |
|
"grad_norm": 0.0021233465522527695, |
|
"learning_rate": 2.2506546060461796e-05, |
|
"loss": 0.0001, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.6519876219947631, |
|
"grad_norm": 0.019824443385004997, |
|
"learning_rate": 2.246687296675395e-05, |
|
"loss": 0.0001, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.654368007617234, |
|
"grad_norm": 0.002160045551136136, |
|
"learning_rate": 2.24271998730461e-05, |
|
"loss": 0.0001, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.6567483932397047, |
|
"grad_norm": 0.002742405980825424, |
|
"learning_rate": 2.2387526779338254e-05, |
|
"loss": 0.0003, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.6591287788621756, |
|
"grad_norm": 0.04358428716659546, |
|
"learning_rate": 2.2347853685630407e-05, |
|
"loss": 0.0003, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.6615091644846465, |
|
"grad_norm": 0.0023650035727769136, |
|
"learning_rate": 2.2308180591922558e-05, |
|
"loss": 0.0, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.6638895501071174, |
|
"grad_norm": 0.0027010326739400625, |
|
"learning_rate": 2.226850749821471e-05, |
|
"loss": 0.0001, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.6662699357295883, |
|
"grad_norm": 0.01885942928493023, |
|
"learning_rate": 2.2228834404506865e-05, |
|
"loss": 0.0001, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.6686503213520592, |
|
"grad_norm": 0.013014287687838078, |
|
"learning_rate": 2.218916131079902e-05, |
|
"loss": 0.0004, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.6710307069745298, |
|
"grad_norm": 0.0015542235923931003, |
|
"learning_rate": 2.214948821709117e-05, |
|
"loss": 0.0001, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.6734110925970007, |
|
"grad_norm": 0.011335782706737518, |
|
"learning_rate": 2.2109815123383323e-05, |
|
"loss": 0.0, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.6757914782194716, |
|
"grad_norm": 0.1068568155169487, |
|
"learning_rate": 2.2070142029675476e-05, |
|
"loss": 0.0001, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.6781718638419423, |
|
"grad_norm": 0.004407468251883984, |
|
"learning_rate": 2.2030468935967627e-05, |
|
"loss": 0.0001, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.6805522494644132, |
|
"grad_norm": 0.0026373250875622034, |
|
"learning_rate": 2.199079584225978e-05, |
|
"loss": 0.0, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.682932635086884, |
|
"grad_norm": 0.020453903824090958, |
|
"learning_rate": 2.1951122748551934e-05, |
|
"loss": 0.0001, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.685313020709355, |
|
"grad_norm": 0.009605340659618378, |
|
"learning_rate": 2.1911449654844084e-05, |
|
"loss": 0.0003, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.6876934063318259, |
|
"grad_norm": 0.0008563417941331863, |
|
"learning_rate": 2.1871776561136238e-05, |
|
"loss": 0.0, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.6900737919542967, |
|
"grad_norm": 0.0017095934599637985, |
|
"learning_rate": 2.1832103467428392e-05, |
|
"loss": 0.0001, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.6924541775767674, |
|
"grad_norm": 0.0017231311649084091, |
|
"learning_rate": 2.1792430373720542e-05, |
|
"loss": 0.0001, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.6948345631992383, |
|
"grad_norm": 0.0004322198801673949, |
|
"learning_rate": 2.17527572800127e-05, |
|
"loss": 0.0001, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.697214948821709, |
|
"grad_norm": 0.06828305870294571, |
|
"learning_rate": 2.171308418630485e-05, |
|
"loss": 0.0003, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.6995953344441799, |
|
"grad_norm": 0.012662236578762531, |
|
"learning_rate": 2.1673411092597e-05, |
|
"loss": 0.0, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.7019757200666508, |
|
"grad_norm": 0.0004414702707435936, |
|
"learning_rate": 2.1633737998889153e-05, |
|
"loss": 0.0, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.7043561056891217, |
|
"grad_norm": 0.0018225832609459758, |
|
"learning_rate": 2.1594064905181307e-05, |
|
"loss": 0.0001, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.7067364913115926, |
|
"grad_norm": 0.10008008033037186, |
|
"learning_rate": 2.155439181147346e-05, |
|
"loss": 0.0001, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.7091168769340634, |
|
"grad_norm": 0.0027361391112208366, |
|
"learning_rate": 2.151471871776561e-05, |
|
"loss": 0.0, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.7114972625565341, |
|
"grad_norm": 0.0021505611948668957, |
|
"learning_rate": 2.1475045624057765e-05, |
|
"loss": 0.0, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.713877648179005, |
|
"grad_norm": 0.00697895884513855, |
|
"learning_rate": 2.143537253034992e-05, |
|
"loss": 0.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.7162580338014757, |
|
"grad_norm": 0.002057724166661501, |
|
"learning_rate": 2.139569943664207e-05, |
|
"loss": 0.0, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.7186384194239466, |
|
"grad_norm": 0.002399923512712121, |
|
"learning_rate": 2.1356026342934223e-05, |
|
"loss": 0.0001, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.7210188050464175, |
|
"grad_norm": 0.3061892092227936, |
|
"learning_rate": 2.1316353249226376e-05, |
|
"loss": 0.0002, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.7233991906688884, |
|
"grad_norm": 0.004888875875622034, |
|
"learning_rate": 2.1276680155518527e-05, |
|
"loss": 0.0, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.7257795762913593, |
|
"grad_norm": 0.04453931376338005, |
|
"learning_rate": 2.1237007061810684e-05, |
|
"loss": 0.0001, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.7281599619138301, |
|
"grad_norm": 0.02463744953274727, |
|
"learning_rate": 2.1197333968102834e-05, |
|
"loss": 0.0001, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.730540347536301, |
|
"grad_norm": 0.002113641705363989, |
|
"learning_rate": 2.1157660874394984e-05, |
|
"loss": 0.0001, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.7329207331587717, |
|
"grad_norm": 0.0024889137130230665, |
|
"learning_rate": 2.111798778068714e-05, |
|
"loss": 0.0001, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.7353011187812426, |
|
"grad_norm": 0.10477261245250702, |
|
"learning_rate": 2.107831468697929e-05, |
|
"loss": 0.0001, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.7376815044037133, |
|
"grad_norm": 0.0008585329633206129, |
|
"learning_rate": 2.1038641593271445e-05, |
|
"loss": 0.0001, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.7400618900261842, |
|
"grad_norm": 0.17968738079071045, |
|
"learning_rate": 2.09989684995636e-05, |
|
"loss": 0.0001, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.742442275648655, |
|
"grad_norm": 0.0023223140742629766, |
|
"learning_rate": 2.095929540585575e-05, |
|
"loss": 0.0001, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.744822661271126, |
|
"grad_norm": 0.0016741958679631352, |
|
"learning_rate": 2.0919622312147903e-05, |
|
"loss": 0.0002, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.7472030468935968, |
|
"grad_norm": 0.009992700070142746, |
|
"learning_rate": 2.0879949218440053e-05, |
|
"loss": 0.0, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.7495834325160677, |
|
"grad_norm": 0.002163327531889081, |
|
"learning_rate": 2.0840276124732207e-05, |
|
"loss": 0.0, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.7519638181385384, |
|
"grad_norm": 0.15539680421352386, |
|
"learning_rate": 2.080060303102436e-05, |
|
"loss": 0.0003, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.7543442037610093, |
|
"grad_norm": 0.002331450814381242, |
|
"learning_rate": 2.076092993731651e-05, |
|
"loss": 0.0, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.75672458938348, |
|
"grad_norm": 0.0014541965210810304, |
|
"learning_rate": 2.0721256843608665e-05, |
|
"loss": 0.0, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.7591049750059509, |
|
"grad_norm": 0.002874292666092515, |
|
"learning_rate": 2.068158374990082e-05, |
|
"loss": 0.0, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.7614853606284218, |
|
"grad_norm": 0.046790674328804016, |
|
"learning_rate": 2.064191065619297e-05, |
|
"loss": 0.0001, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.7638657462508927, |
|
"grad_norm": 0.012541896663606167, |
|
"learning_rate": 2.0602237562485126e-05, |
|
"loss": 0.0001, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.7662461318733635, |
|
"grad_norm": 0.0005884987185709178, |
|
"learning_rate": 2.0562564468777276e-05, |
|
"loss": 0.0001, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.7686265174958344, |
|
"grad_norm": 0.0090475520119071, |
|
"learning_rate": 2.0522891375069426e-05, |
|
"loss": 0.0001, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.7710069031183053, |
|
"grad_norm": 0.04852410405874252, |
|
"learning_rate": 2.0483218281361584e-05, |
|
"loss": 0.0001, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.773387288740776, |
|
"grad_norm": 0.003311296459287405, |
|
"learning_rate": 2.0443545187653734e-05, |
|
"loss": 0.0001, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.775767674363247, |
|
"grad_norm": 0.03242022171616554, |
|
"learning_rate": 2.0403872093945888e-05, |
|
"loss": 0.0001, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.7781480599857176, |
|
"grad_norm": 0.010833712294697762, |
|
"learning_rate": 2.036419900023804e-05, |
|
"loss": 0.0002, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.7805284456081885, |
|
"grad_norm": 0.0031983698718249798, |
|
"learning_rate": 2.032452590653019e-05, |
|
"loss": 0.0001, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.7829088312306594, |
|
"grad_norm": 0.021590987220406532, |
|
"learning_rate": 2.0284852812822345e-05, |
|
"loss": 0.0001, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.7852892168531302, |
|
"grad_norm": 0.005147872492671013, |
|
"learning_rate": 2.02451797191145e-05, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.7876696024756011, |
|
"grad_norm": 0.0012411813950166106, |
|
"learning_rate": 2.020550662540665e-05, |
|
"loss": 0.0001, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.790049988098072, |
|
"grad_norm": 0.0009874672396108508, |
|
"learning_rate": 2.0165833531698803e-05, |
|
"loss": 0.0001, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.7924303737205427, |
|
"grad_norm": 0.002135714516043663, |
|
"learning_rate": 2.0126160437990957e-05, |
|
"loss": 0.0003, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.7948107593430136, |
|
"grad_norm": 0.002928838599473238, |
|
"learning_rate": 2.008648734428311e-05, |
|
"loss": 0.0002, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.7971911449654843, |
|
"grad_norm": 0.002418682212010026, |
|
"learning_rate": 2.004681425057526e-05, |
|
"loss": 0.0001, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.7995715305879552, |
|
"grad_norm": 0.022359730675816536, |
|
"learning_rate": 2.000714115686741e-05, |
|
"loss": 0.0, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.801951916210426, |
|
"grad_norm": 0.0013171250466257334, |
|
"learning_rate": 1.9967468063159568e-05, |
|
"loss": 0.0001, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.804332301832897, |
|
"grad_norm": 0.005206149537116289, |
|
"learning_rate": 1.992779496945172e-05, |
|
"loss": 0.0001, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.8067126874553678, |
|
"grad_norm": 0.5035125613212585, |
|
"learning_rate": 1.988812187574387e-05, |
|
"loss": 0.0004, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.8090930730778387, |
|
"grad_norm": 0.0018090710509568453, |
|
"learning_rate": 1.9848448782036026e-05, |
|
"loss": 0.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.8114734587003094, |
|
"grad_norm": 0.0020274862181395292, |
|
"learning_rate": 1.9808775688328176e-05, |
|
"loss": 0.0, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.8138538443227803, |
|
"grad_norm": 0.008559592068195343, |
|
"learning_rate": 1.976910259462033e-05, |
|
"loss": 0.0001, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.816234229945251, |
|
"grad_norm": 0.002766631543636322, |
|
"learning_rate": 1.9729429500912483e-05, |
|
"loss": 0.0, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.8186146155677219, |
|
"grad_norm": 0.003933802247047424, |
|
"learning_rate": 1.9689756407204634e-05, |
|
"loss": 0.0001, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.8209950011901928, |
|
"grad_norm": 0.0502641461789608, |
|
"learning_rate": 1.9650083313496787e-05, |
|
"loss": 0.0001, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.8233753868126636, |
|
"grad_norm": 0.002705627353861928, |
|
"learning_rate": 1.961041021978894e-05, |
|
"loss": 0.0, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.8257557724351345, |
|
"grad_norm": 0.015057703480124474, |
|
"learning_rate": 1.957073712608109e-05, |
|
"loss": 0.0001, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.8281361580576054, |
|
"grad_norm": 0.0005775150493718684, |
|
"learning_rate": 1.9531064032373245e-05, |
|
"loss": 0.0001, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.8305165436800763, |
|
"grad_norm": 0.006392305716872215, |
|
"learning_rate": 1.94913909386654e-05, |
|
"loss": 0.0001, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.832896929302547, |
|
"grad_norm": 0.0014930195175111294, |
|
"learning_rate": 1.9451717844957553e-05, |
|
"loss": 0.0002, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.8352773149250179, |
|
"grad_norm": 0.0161952693015337, |
|
"learning_rate": 1.9412044751249703e-05, |
|
"loss": 0.0001, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.8376577005474886, |
|
"grad_norm": 0.0019109123386442661, |
|
"learning_rate": 1.9372371657541857e-05, |
|
"loss": 0.0001, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.8400380861699595, |
|
"grad_norm": 0.0026801279745996, |
|
"learning_rate": 1.933269856383401e-05, |
|
"loss": 0.0, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.8424184717924303, |
|
"grad_norm": 0.006187149789184332, |
|
"learning_rate": 1.929302547012616e-05, |
|
"loss": 0.0001, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.8447988574149012, |
|
"grad_norm": 0.002990028355270624, |
|
"learning_rate": 1.9253352376418314e-05, |
|
"loss": 0.0001, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.8471792430373721, |
|
"grad_norm": 0.0044268155470490456, |
|
"learning_rate": 1.9213679282710468e-05, |
|
"loss": 0.0, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.849559628659843, |
|
"grad_norm": 0.005206019151955843, |
|
"learning_rate": 1.9174006189002618e-05, |
|
"loss": 0.0001, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.8519400142823137, |
|
"grad_norm": 0.005415783729404211, |
|
"learning_rate": 1.9134333095294772e-05, |
|
"loss": 0.0001, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.8543203999047846, |
|
"grad_norm": 0.0016888550017029047, |
|
"learning_rate": 1.9094660001586926e-05, |
|
"loss": 0.0, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.8567007855272553, |
|
"grad_norm": 0.003122705966234207, |
|
"learning_rate": 1.9054986907879076e-05, |
|
"loss": 0.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.8590811711497262, |
|
"grad_norm": 0.021525248885154724, |
|
"learning_rate": 1.901531381417123e-05, |
|
"loss": 0.0, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.861461556772197, |
|
"grad_norm": 0.004836782813072205, |
|
"learning_rate": 1.8975640720463383e-05, |
|
"loss": 0.0, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.863841942394668, |
|
"grad_norm": 0.003003711812198162, |
|
"learning_rate": 1.8935967626755534e-05, |
|
"loss": 0.0001, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.8662223280171388, |
|
"grad_norm": 0.0034373151138424873, |
|
"learning_rate": 1.8896294533047687e-05, |
|
"loss": 0.0001, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.8686027136396097, |
|
"grad_norm": 0.061307862401008606, |
|
"learning_rate": 1.885662143933984e-05, |
|
"loss": 0.0001, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.8709830992620806, |
|
"grad_norm": 0.001207771128974855, |
|
"learning_rate": 1.8816948345631995e-05, |
|
"loss": 0.0, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.8733634848845513, |
|
"grad_norm": 0.007686016149818897, |
|
"learning_rate": 1.8777275251924145e-05, |
|
"loss": 0.0001, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.8757438705070222, |
|
"grad_norm": 0.0019049645634368062, |
|
"learning_rate": 1.87376021582163e-05, |
|
"loss": 0.0001, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.8781242561294929, |
|
"grad_norm": 0.00202633673325181, |
|
"learning_rate": 1.8697929064508452e-05, |
|
"loss": 0.0, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.8805046417519637, |
|
"grad_norm": 0.0011157892877236009, |
|
"learning_rate": 1.8658255970800603e-05, |
|
"loss": 0.0002, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.8828850273744346, |
|
"grad_norm": 0.001622357638552785, |
|
"learning_rate": 1.8618582877092756e-05, |
|
"loss": 0.0006, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.8852654129969055, |
|
"grad_norm": 0.04895901307463646, |
|
"learning_rate": 1.857890978338491e-05, |
|
"loss": 0.0002, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.8876457986193764, |
|
"grad_norm": 0.0012425240129232407, |
|
"learning_rate": 1.853923668967706e-05, |
|
"loss": 0.0001, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.8900261842418473, |
|
"grad_norm": 0.004690519999712706, |
|
"learning_rate": 1.8499563595969214e-05, |
|
"loss": 0.0001, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.892406569864318, |
|
"grad_norm": 0.0015794184291735291, |
|
"learning_rate": 1.8459890502261368e-05, |
|
"loss": 0.0, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.8947869554867889, |
|
"grad_norm": 0.01080586388707161, |
|
"learning_rate": 1.8420217408553518e-05, |
|
"loss": 0.0002, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.8971673411092596, |
|
"grad_norm": 0.0018335338681936264, |
|
"learning_rate": 1.8380544314845672e-05, |
|
"loss": 0.0, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.8995477267317304, |
|
"grad_norm": 0.003800921142101288, |
|
"learning_rate": 1.8340871221137826e-05, |
|
"loss": 0.0, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.9019281123542013, |
|
"grad_norm": 0.0035681715235114098, |
|
"learning_rate": 1.830119812742998e-05, |
|
"loss": 0.0001, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.9043084979766722, |
|
"grad_norm": 0.001115818158723414, |
|
"learning_rate": 1.826152503372213e-05, |
|
"loss": 0.0003, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.9066888835991431, |
|
"grad_norm": 0.004726150073111057, |
|
"learning_rate": 1.8221851940014283e-05, |
|
"loss": 0.0001, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.909069269221614, |
|
"grad_norm": 0.025985538959503174, |
|
"learning_rate": 1.8182178846306437e-05, |
|
"loss": 0.0, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.9114496548440847, |
|
"grad_norm": 0.002658289624378085, |
|
"learning_rate": 1.8142505752598587e-05, |
|
"loss": 0.0001, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.9138300404665556, |
|
"grad_norm": 0.010776730254292488, |
|
"learning_rate": 1.810283265889074e-05, |
|
"loss": 0.0001, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.9162104260890265, |
|
"grad_norm": 0.004742765333503485, |
|
"learning_rate": 1.8063159565182895e-05, |
|
"loss": 0.0001, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.9185908117114971, |
|
"grad_norm": 0.0017833469901233912, |
|
"learning_rate": 1.8023486471475045e-05, |
|
"loss": 0.0003, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.920971197333968, |
|
"grad_norm": 0.0015226156683638692, |
|
"learning_rate": 1.79838133777672e-05, |
|
"loss": 0.0, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.923351582956439, |
|
"grad_norm": 0.0021416472736746073, |
|
"learning_rate": 1.7944140284059352e-05, |
|
"loss": 0.0001, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.9257319685789098, |
|
"grad_norm": 0.0021594560239464045, |
|
"learning_rate": 1.7904467190351503e-05, |
|
"loss": 0.0, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.9281123542013807, |
|
"grad_norm": 0.0018359982641413808, |
|
"learning_rate": 1.786479409664366e-05, |
|
"loss": 0.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.9304927398238516, |
|
"grad_norm": 0.0036185849457979202, |
|
"learning_rate": 1.782512100293581e-05, |
|
"loss": 0.0001, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.9328731254463223, |
|
"grad_norm": 0.019637318328022957, |
|
"learning_rate": 1.778544790922796e-05, |
|
"loss": 0.0, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.9352535110687932, |
|
"grad_norm": 0.002496182220056653, |
|
"learning_rate": 1.7745774815520117e-05, |
|
"loss": 0.0001, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.9376338966912638, |
|
"grad_norm": 0.004374451469630003, |
|
"learning_rate": 1.7706101721812268e-05, |
|
"loss": 0.0005, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.9400142823137347, |
|
"grad_norm": 0.0006196928443387151, |
|
"learning_rate": 1.766642862810442e-05, |
|
"loss": 0.0, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.9423946679362056, |
|
"grad_norm": 0.0037022046744823456, |
|
"learning_rate": 1.7626755534396572e-05, |
|
"loss": 0.0, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.9447750535586765, |
|
"grad_norm": 0.004300027620047331, |
|
"learning_rate": 1.7587082440688725e-05, |
|
"loss": 0.0, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.9471554391811474, |
|
"grad_norm": 0.0019766122568398714, |
|
"learning_rate": 1.754740934698088e-05, |
|
"loss": 0.0001, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.9495358248036183, |
|
"grad_norm": 0.0018594982102513313, |
|
"learning_rate": 1.750773625327303e-05, |
|
"loss": 0.0001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.951916210426089, |
|
"grad_norm": 0.0012102769687771797, |
|
"learning_rate": 1.7468063159565183e-05, |
|
"loss": 0.0001, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.9542965960485599, |
|
"grad_norm": 0.0012130772229284048, |
|
"learning_rate": 1.7428390065857337e-05, |
|
"loss": 0.0, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.9566769816710305, |
|
"grad_norm": 0.0006833472289144993, |
|
"learning_rate": 1.7388716972149487e-05, |
|
"loss": 0.0004, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.9590573672935014, |
|
"grad_norm": 0.0017617164412513375, |
|
"learning_rate": 1.7349043878441644e-05, |
|
"loss": 0.0001, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.9614377529159723, |
|
"grad_norm": 0.0013312195660546422, |
|
"learning_rate": 1.7309370784733795e-05, |
|
"loss": 0.0, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.9638181385384432, |
|
"grad_norm": 0.0018878667615354061, |
|
"learning_rate": 1.7269697691025945e-05, |
|
"loss": 0.0, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.9661985241609141, |
|
"grad_norm": 0.0019427284132689238, |
|
"learning_rate": 1.7230024597318102e-05, |
|
"loss": 0.0, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.968578909783385, |
|
"grad_norm": 0.004271362908184528, |
|
"learning_rate": 1.7190351503610252e-05, |
|
"loss": 0.0001, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.970959295405856, |
|
"grad_norm": 0.0027857243549078703, |
|
"learning_rate": 1.7150678409902406e-05, |
|
"loss": 0.0, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.9733396810283266, |
|
"grad_norm": 0.0018286170670762658, |
|
"learning_rate": 1.711100531619456e-05, |
|
"loss": 0.0001, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.9757200666507975, |
|
"grad_norm": 0.001666391035541892, |
|
"learning_rate": 1.707133222248671e-05, |
|
"loss": 0.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.9781004522732681, |
|
"grad_norm": 0.021936526522040367, |
|
"learning_rate": 1.7031659128778864e-05, |
|
"loss": 0.0001, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.980480837895739, |
|
"grad_norm": 0.00029301681206561625, |
|
"learning_rate": 1.6991986035071017e-05, |
|
"loss": 0.0, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.98286122351821, |
|
"grad_norm": 0.0009200606727972627, |
|
"learning_rate": 1.6952312941363168e-05, |
|
"loss": 0.0, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.9852416091406808, |
|
"grad_norm": 0.00579107366502285, |
|
"learning_rate": 1.691263984765532e-05, |
|
"loss": 0.0, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.9876219947631517, |
|
"grad_norm": 0.000620057515334338, |
|
"learning_rate": 1.687296675394747e-05, |
|
"loss": 0.0001, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.9900023803856226, |
|
"grad_norm": 0.0015694822650402784, |
|
"learning_rate": 1.6833293660239625e-05, |
|
"loss": 0.0, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.9923827660080933, |
|
"grad_norm": 0.0013426202349364758, |
|
"learning_rate": 1.679362056653178e-05, |
|
"loss": 0.0, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.9947631516305642, |
|
"grad_norm": 0.06455473601818085, |
|
"learning_rate": 1.675394747282393e-05, |
|
"loss": 0.0001, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.9971435372530348, |
|
"grad_norm": 0.0007938113994896412, |
|
"learning_rate": 1.6714274379116086e-05, |
|
"loss": 0.0001, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.9995239228755057, |
|
"grad_norm": 0.0030489168129861355, |
|
"learning_rate": 1.6674601285408237e-05, |
|
"loss": 0.0001, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 7.416475114041532e-07, |
|
"eval_runtime": 52.1219, |
|
"eval_samples_per_second": 35.839, |
|
"eval_steps_per_second": 8.96, |
|
"step": 8402 |
|
}, |
|
{ |
|
"epoch": 2.0019043084979766, |
|
"grad_norm": 0.00039361350354738533, |
|
"learning_rate": 1.6634928191700387e-05, |
|
"loss": 0.0001, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 2.0042846941204475, |
|
"grad_norm": 0.007912525907158852, |
|
"learning_rate": 1.6595255097992544e-05, |
|
"loss": 0.0001, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 2.0066650797429184, |
|
"grad_norm": 0.003857001895084977, |
|
"learning_rate": 1.6555582004284694e-05, |
|
"loss": 0.0001, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 2.0090454653653893, |
|
"grad_norm": 0.002192788990214467, |
|
"learning_rate": 1.6515908910576848e-05, |
|
"loss": 0.0, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 2.01142585098786, |
|
"grad_norm": 0.00107199524063617, |
|
"learning_rate": 1.6476235816869002e-05, |
|
"loss": 0.0002, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.013806236610331, |
|
"grad_norm": 0.024036822840571404, |
|
"learning_rate": 1.6436562723161152e-05, |
|
"loss": 0.0001, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 2.0161866222328015, |
|
"grad_norm": 0.000551603501662612, |
|
"learning_rate": 1.6396889629453306e-05, |
|
"loss": 0.0, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 2.0185670078552724, |
|
"grad_norm": 0.001782495528459549, |
|
"learning_rate": 1.635721653574546e-05, |
|
"loss": 0.0001, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 2.0209473934777433, |
|
"grad_norm": 0.030838970094919205, |
|
"learning_rate": 1.631754344203761e-05, |
|
"loss": 0.0001, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 2.023327779100214, |
|
"grad_norm": 0.0005242625484243035, |
|
"learning_rate": 1.6277870348329764e-05, |
|
"loss": 0.0003, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.025708164722685, |
|
"grad_norm": 0.001871236483566463, |
|
"learning_rate": 1.6238197254621917e-05, |
|
"loss": 0.0, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 2.028088550345156, |
|
"grad_norm": 0.0005813137395307422, |
|
"learning_rate": 1.6198524160914068e-05, |
|
"loss": 0.0, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 2.030468935967627, |
|
"grad_norm": 0.0007783659384585917, |
|
"learning_rate": 1.615885106720622e-05, |
|
"loss": 0.0, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 2.032849321590098, |
|
"grad_norm": 0.002862844616174698, |
|
"learning_rate": 1.6119177973498375e-05, |
|
"loss": 0.0001, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 2.0352297072125682, |
|
"grad_norm": 0.0016766699263826013, |
|
"learning_rate": 1.607950487979053e-05, |
|
"loss": 0.0, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.037610092835039, |
|
"grad_norm": 0.06566356122493744, |
|
"learning_rate": 1.603983178608268e-05, |
|
"loss": 0.0002, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 2.03999047845751, |
|
"grad_norm": 0.0013121259398758411, |
|
"learning_rate": 1.600015869237483e-05, |
|
"loss": 0.0, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 2.042370864079981, |
|
"grad_norm": 0.0012001970317214727, |
|
"learning_rate": 1.5960485598666986e-05, |
|
"loss": 0.0001, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 2.044751249702452, |
|
"grad_norm": 0.008261552080512047, |
|
"learning_rate": 1.5920812504959137e-05, |
|
"loss": 0.0, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 2.0471316353249227, |
|
"grad_norm": 0.0006174147129058838, |
|
"learning_rate": 1.588113941125129e-05, |
|
"loss": 0.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.0495120209473936, |
|
"grad_norm": 0.005130809266120195, |
|
"learning_rate": 1.5841466317543444e-05, |
|
"loss": 0.0001, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 2.0518924065698645, |
|
"grad_norm": 0.0034670240711420774, |
|
"learning_rate": 1.5801793223835594e-05, |
|
"loss": 0.0004, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 2.054272792192335, |
|
"grad_norm": 0.0055514005944132805, |
|
"learning_rate": 1.5762120130127748e-05, |
|
"loss": 0.0001, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 2.056653177814806, |
|
"grad_norm": 0.0003135903971269727, |
|
"learning_rate": 1.5722447036419902e-05, |
|
"loss": 0.0, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 2.0590335634372767, |
|
"grad_norm": 0.002474389737471938, |
|
"learning_rate": 1.5682773942712052e-05, |
|
"loss": 0.0001, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.0614139490597476, |
|
"grad_norm": 0.004792024847120047, |
|
"learning_rate": 1.5643100849004206e-05, |
|
"loss": 0.0, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 2.0637943346822185, |
|
"grad_norm": 0.0030985362827777863, |
|
"learning_rate": 1.560342775529636e-05, |
|
"loss": 0.0001, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 2.0661747203046894, |
|
"grad_norm": 0.004058391321450472, |
|
"learning_rate": 1.5563754661588513e-05, |
|
"loss": 0.0, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 2.0685551059271603, |
|
"grad_norm": 0.00150771695189178, |
|
"learning_rate": 1.5524081567880663e-05, |
|
"loss": 0.0, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 2.070935491549631, |
|
"grad_norm": 0.001020533381961286, |
|
"learning_rate": 1.5484408474172817e-05, |
|
"loss": 0.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.073315877172102, |
|
"grad_norm": 0.00616106390953064, |
|
"learning_rate": 1.544473538046497e-05, |
|
"loss": 0.0, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 2.0756962627945725, |
|
"grad_norm": 0.0025589261204004288, |
|
"learning_rate": 1.540506228675712e-05, |
|
"loss": 0.0001, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 2.0780766484170434, |
|
"grad_norm": 0.0006466865306720138, |
|
"learning_rate": 1.5365389193049275e-05, |
|
"loss": 0.0, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 2.0804570340395143, |
|
"grad_norm": 0.002343350788578391, |
|
"learning_rate": 1.532571609934143e-05, |
|
"loss": 0.0001, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 2.082837419661985, |
|
"grad_norm": 0.0006717872456647456, |
|
"learning_rate": 1.528604300563358e-05, |
|
"loss": 0.0001, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.085217805284456, |
|
"grad_norm": 0.0009957224829122424, |
|
"learning_rate": 1.524636991192573e-05, |
|
"loss": 0.0002, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 2.087598190906927, |
|
"grad_norm": 0.0014106009621173143, |
|
"learning_rate": 1.5206696818217886e-05, |
|
"loss": 0.0, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 2.089978576529398, |
|
"grad_norm": 0.0011065505677834153, |
|
"learning_rate": 1.5167023724510038e-05, |
|
"loss": 0.0001, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 2.092358962151869, |
|
"grad_norm": 0.0027844165451824665, |
|
"learning_rate": 1.512735063080219e-05, |
|
"loss": 0.0001, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 2.0947393477743392, |
|
"grad_norm": 0.0006960778846405447, |
|
"learning_rate": 1.5087677537094344e-05, |
|
"loss": 0.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.09711973339681, |
|
"grad_norm": 0.0003423156449571252, |
|
"learning_rate": 1.5048004443386496e-05, |
|
"loss": 0.0, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 2.099500119019281, |
|
"grad_norm": 0.0011733579449355602, |
|
"learning_rate": 1.5008331349678648e-05, |
|
"loss": 0.0, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 2.101880504641752, |
|
"grad_norm": 0.004115458112210035, |
|
"learning_rate": 1.4968658255970802e-05, |
|
"loss": 0.0, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 2.104260890264223, |
|
"grad_norm": 0.072359099984169, |
|
"learning_rate": 1.4928985162262954e-05, |
|
"loss": 0.0, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 2.1066412758866937, |
|
"grad_norm": 0.003922273404896259, |
|
"learning_rate": 1.4889312068555106e-05, |
|
"loss": 0.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.1090216615091646, |
|
"grad_norm": 0.012736503966152668, |
|
"learning_rate": 1.4849638974847261e-05, |
|
"loss": 0.0, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 2.1114020471316355, |
|
"grad_norm": 0.0019338323036208749, |
|
"learning_rate": 1.4809965881139411e-05, |
|
"loss": 0.0001, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 2.1137824327541064, |
|
"grad_norm": 0.0015457593835890293, |
|
"learning_rate": 1.4770292787431563e-05, |
|
"loss": 0.0, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 2.116162818376577, |
|
"grad_norm": 0.0016716497484594584, |
|
"learning_rate": 1.4730619693723719e-05, |
|
"loss": 0.0, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 2.1185432039990477, |
|
"grad_norm": 0.001560089411213994, |
|
"learning_rate": 1.469094660001587e-05, |
|
"loss": 0.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.1209235896215186, |
|
"grad_norm": 0.0031743065919727087, |
|
"learning_rate": 1.4651273506308023e-05, |
|
"loss": 0.0, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 2.1233039752439895, |
|
"grad_norm": 0.0015614436706528068, |
|
"learning_rate": 1.4611600412600176e-05, |
|
"loss": 0.0, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 2.1256843608664604, |
|
"grad_norm": 0.0005399516085162759, |
|
"learning_rate": 1.4571927318892328e-05, |
|
"loss": 0.0, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 2.1280647464889313, |
|
"grad_norm": 0.0014794693561270833, |
|
"learning_rate": 1.453225422518448e-05, |
|
"loss": 0.0, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 2.130445132111402, |
|
"grad_norm": 0.0024672893341630697, |
|
"learning_rate": 1.4492581131476632e-05, |
|
"loss": 0.0, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.132825517733873, |
|
"grad_norm": 0.0013646584702655673, |
|
"learning_rate": 1.4452908037768786e-05, |
|
"loss": 0.0, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 2.1352059033563435, |
|
"grad_norm": 0.07290241867303848, |
|
"learning_rate": 1.4413234944060938e-05, |
|
"loss": 0.0001, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 2.1375862889788144, |
|
"grad_norm": 0.001859787036664784, |
|
"learning_rate": 1.437356185035309e-05, |
|
"loss": 0.0, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 2.1399666746012853, |
|
"grad_norm": 0.001754750614054501, |
|
"learning_rate": 1.4333888756645244e-05, |
|
"loss": 0.0, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 2.142347060223756, |
|
"grad_norm": 0.028476126492023468, |
|
"learning_rate": 1.4294215662937396e-05, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.144727445846227, |
|
"grad_norm": 0.0005994876846671104, |
|
"learning_rate": 1.4254542569229548e-05, |
|
"loss": 0.0, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 2.147107831468698, |
|
"grad_norm": 0.0007879494805820286, |
|
"learning_rate": 1.4214869475521703e-05, |
|
"loss": 0.0, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 2.149488217091169, |
|
"grad_norm": 0.0012654970632866025, |
|
"learning_rate": 1.4175196381813855e-05, |
|
"loss": 0.0, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 2.1518686027136398, |
|
"grad_norm": 0.0018679037457332015, |
|
"learning_rate": 1.4135523288106006e-05, |
|
"loss": 0.0, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 2.1542489883361107, |
|
"grad_norm": 0.0017861429369077086, |
|
"learning_rate": 1.4095850194398161e-05, |
|
"loss": 0.0, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.156629373958581, |
|
"grad_norm": 0.006415149662643671, |
|
"learning_rate": 1.4056177100690313e-05, |
|
"loss": 0.0, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 2.159009759581052, |
|
"grad_norm": 0.002842891961336136, |
|
"learning_rate": 1.4016504006982465e-05, |
|
"loss": 0.0, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 2.161390145203523, |
|
"grad_norm": 0.0013869826216250658, |
|
"learning_rate": 1.3976830913274619e-05, |
|
"loss": 0.0, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 2.163770530825994, |
|
"grad_norm": 0.018388478085398674, |
|
"learning_rate": 1.393715781956677e-05, |
|
"loss": 0.0001, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 2.1661509164484647, |
|
"grad_norm": 0.0008245584322139621, |
|
"learning_rate": 1.3897484725858923e-05, |
|
"loss": 0.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.1685313020709356, |
|
"grad_norm": 0.36837905645370483, |
|
"learning_rate": 1.3857811632151076e-05, |
|
"loss": 0.0001, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 2.1709116876934065, |
|
"grad_norm": 0.002466343343257904, |
|
"learning_rate": 1.3818138538443228e-05, |
|
"loss": 0.0001, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 2.1732920733158774, |
|
"grad_norm": 0.0035982499830424786, |
|
"learning_rate": 1.377846544473538e-05, |
|
"loss": 0.0, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 2.175672458938348, |
|
"grad_norm": 0.13738982379436493, |
|
"learning_rate": 1.3738792351027536e-05, |
|
"loss": 0.0001, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 2.1780528445608187, |
|
"grad_norm": 0.00042806967394426465, |
|
"learning_rate": 1.3699119257319688e-05, |
|
"loss": 0.0, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.1804332301832896, |
|
"grad_norm": 0.002727969316765666, |
|
"learning_rate": 1.3659446163611838e-05, |
|
"loss": 0.0, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 2.1828136158057605, |
|
"grad_norm": 0.0010691905627027154, |
|
"learning_rate": 1.361977306990399e-05, |
|
"loss": 0.0, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 2.1851940014282314, |
|
"grad_norm": 0.020881984382867813, |
|
"learning_rate": 1.3580099976196145e-05, |
|
"loss": 0.0, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 2.1875743870507023, |
|
"grad_norm": 0.0019363940227776766, |
|
"learning_rate": 1.3540426882488297e-05, |
|
"loss": 0.0, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 2.189954772673173, |
|
"grad_norm": 0.001359110465273261, |
|
"learning_rate": 1.350075378878045e-05, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.192335158295644, |
|
"grad_norm": 0.0024417322129011154, |
|
"learning_rate": 1.3461080695072603e-05, |
|
"loss": 0.0, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 2.1947155439181145, |
|
"grad_norm": 0.0006399775156751275, |
|
"learning_rate": 1.3421407601364755e-05, |
|
"loss": 0.0, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 2.1970959295405854, |
|
"grad_norm": 0.001347382552921772, |
|
"learning_rate": 1.3381734507656907e-05, |
|
"loss": 0.0001, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 2.1994763151630563, |
|
"grad_norm": 0.002276881132274866, |
|
"learning_rate": 1.334206141394906e-05, |
|
"loss": 0.0, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.201856700785527, |
|
"grad_norm": 0.0005205354536883533, |
|
"learning_rate": 1.3302388320241213e-05, |
|
"loss": 0.0, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.204237086407998, |
|
"grad_norm": 0.001351204700767994, |
|
"learning_rate": 1.3262715226533365e-05, |
|
"loss": 0.0, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.206617472030469, |
|
"grad_norm": 0.00529600540176034, |
|
"learning_rate": 1.322304213282552e-05, |
|
"loss": 0.0002, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 2.20899785765294, |
|
"grad_norm": 0.002000352367758751, |
|
"learning_rate": 1.318336903911767e-05, |
|
"loss": 0.0, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.2113782432754108, |
|
"grad_norm": 0.0011036837240681052, |
|
"learning_rate": 1.3143695945409823e-05, |
|
"loss": 0.0, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 2.2137586288978817, |
|
"grad_norm": 0.0023322845809161663, |
|
"learning_rate": 1.3104022851701978e-05, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.216139014520352, |
|
"grad_norm": 0.0029122158885002136, |
|
"learning_rate": 1.306434975799413e-05, |
|
"loss": 0.0, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 2.218519400142823, |
|
"grad_norm": 0.00949085596948862, |
|
"learning_rate": 1.302467666428628e-05, |
|
"loss": 0.0, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.220899785765294, |
|
"grad_norm": 0.0013391702668741345, |
|
"learning_rate": 1.2985003570578436e-05, |
|
"loss": 0.0, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 2.223280171387765, |
|
"grad_norm": 0.00047678747796453536, |
|
"learning_rate": 1.2945330476870588e-05, |
|
"loss": 0.0, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.2256605570102357, |
|
"grad_norm": 0.0031029602978378534, |
|
"learning_rate": 1.290565738316274e-05, |
|
"loss": 0.0, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.2280409426327066, |
|
"grad_norm": 0.00046392931835725904, |
|
"learning_rate": 1.2865984289454892e-05, |
|
"loss": 0.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.2304213282551775, |
|
"grad_norm": 0.0008917547529563308, |
|
"learning_rate": 1.2826311195747045e-05, |
|
"loss": 0.0, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 2.2328017138776484, |
|
"grad_norm": 0.0039760940708220005, |
|
"learning_rate": 1.2786638102039197e-05, |
|
"loss": 0.0, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.235182099500119, |
|
"grad_norm": 0.0009416754473932087, |
|
"learning_rate": 1.274696500833135e-05, |
|
"loss": 0.0, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.2375624851225897, |
|
"grad_norm": 0.0008697324083186686, |
|
"learning_rate": 1.2707291914623503e-05, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.2399428707450606, |
|
"grad_norm": 0.00044792311382479966, |
|
"learning_rate": 1.2667618820915655e-05, |
|
"loss": 0.0, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 2.2423232563675315, |
|
"grad_norm": 0.0014049585442990065, |
|
"learning_rate": 1.2627945727207807e-05, |
|
"loss": 0.0, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.2447036419900024, |
|
"grad_norm": 0.00259969150647521, |
|
"learning_rate": 1.2588272633499962e-05, |
|
"loss": 0.0, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 2.2470840276124733, |
|
"grad_norm": 0.0012579966569319367, |
|
"learning_rate": 1.2548599539792113e-05, |
|
"loss": 0.0, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.249464413234944, |
|
"grad_norm": 0.008475791662931442, |
|
"learning_rate": 1.2508926446084265e-05, |
|
"loss": 0.0, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.251844798857415, |
|
"grad_norm": 0.007055677939206362, |
|
"learning_rate": 1.2469253352376418e-05, |
|
"loss": 0.0, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.2542251844798855, |
|
"grad_norm": 0.00043771168566308916, |
|
"learning_rate": 1.2429580258668572e-05, |
|
"loss": 0.0, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 2.2566055701023564, |
|
"grad_norm": 0.0004315728147048503, |
|
"learning_rate": 1.2389907164960724e-05, |
|
"loss": 0.0, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.2589859557248273, |
|
"grad_norm": 0.0006574731087312102, |
|
"learning_rate": 1.2350234071252876e-05, |
|
"loss": 0.0, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 2.261366341347298, |
|
"grad_norm": 0.000502898299600929, |
|
"learning_rate": 1.231056097754503e-05, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.263746726969769, |
|
"grad_norm": 0.0014464023988693953, |
|
"learning_rate": 1.2270887883837182e-05, |
|
"loss": 0.0, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 2.26612711259224, |
|
"grad_norm": 0.0007312349043786526, |
|
"learning_rate": 1.2231214790129336e-05, |
|
"loss": 0.0, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.268507498214711, |
|
"grad_norm": 0.0012411205098032951, |
|
"learning_rate": 1.2191541696421488e-05, |
|
"loss": 0.0, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 2.2708878838371818, |
|
"grad_norm": 0.003359739203006029, |
|
"learning_rate": 1.215186860271364e-05, |
|
"loss": 0.0, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.2732682694596527, |
|
"grad_norm": 0.0025401897728443146, |
|
"learning_rate": 1.2112195509005793e-05, |
|
"loss": 0.0, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.275648655082123, |
|
"grad_norm": 0.0009357984527014196, |
|
"learning_rate": 1.2072522415297945e-05, |
|
"loss": 0.0, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.278029040704594, |
|
"grad_norm": 0.015569353476166725, |
|
"learning_rate": 1.2032849321590097e-05, |
|
"loss": 0.0, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 2.280409426327065, |
|
"grad_norm": 0.0005228265072219074, |
|
"learning_rate": 1.1993176227882251e-05, |
|
"loss": 0.0, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.282789811949536, |
|
"grad_norm": 0.0006133327260613441, |
|
"learning_rate": 1.1953503134174405e-05, |
|
"loss": 0.0, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 2.2851701975720067, |
|
"grad_norm": 0.0006283469265326858, |
|
"learning_rate": 1.1913830040466557e-05, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.2875505831944776, |
|
"grad_norm": 0.0017937012016773224, |
|
"learning_rate": 1.1874156946758709e-05, |
|
"loss": 0.0, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 2.2899309688169485, |
|
"grad_norm": 0.00227372907102108, |
|
"learning_rate": 1.183448385305086e-05, |
|
"loss": 0.0, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.2923113544394194, |
|
"grad_norm": 0.0007874960429035127, |
|
"learning_rate": 1.1794810759343014e-05, |
|
"loss": 0.0, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 2.2946917400618903, |
|
"grad_norm": 0.0012992926640436053, |
|
"learning_rate": 1.1755137665635168e-05, |
|
"loss": 0.0, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.2970721256843607, |
|
"grad_norm": 0.0026856097392737865, |
|
"learning_rate": 1.1715464571927318e-05, |
|
"loss": 0.0001, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.2994525113068316, |
|
"grad_norm": 0.027589144185185432, |
|
"learning_rate": 1.1675791478219472e-05, |
|
"loss": 0.0005, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.3018328969293025, |
|
"grad_norm": 0.00021341729734558612, |
|
"learning_rate": 1.1636118384511626e-05, |
|
"loss": 0.0, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 2.3042132825517734, |
|
"grad_norm": 0.0005525678861886263, |
|
"learning_rate": 1.1596445290803778e-05, |
|
"loss": 0.0, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.3065936681742443, |
|
"grad_norm": 0.0006510653183795512, |
|
"learning_rate": 1.155677219709593e-05, |
|
"loss": 0.0, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 2.308974053796715, |
|
"grad_norm": 0.0011141913710162044, |
|
"learning_rate": 1.1517099103388082e-05, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.311354439419186, |
|
"grad_norm": 0.001998309977352619, |
|
"learning_rate": 1.1477426009680235e-05, |
|
"loss": 0.0, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 2.3137348250416565, |
|
"grad_norm": 0.008638182654976845, |
|
"learning_rate": 1.1437752915972389e-05, |
|
"loss": 0.0, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 2.3161152106641274, |
|
"grad_norm": 0.0004837829037569463, |
|
"learning_rate": 1.139807982226454e-05, |
|
"loss": 0.0, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 2.3184955962865983, |
|
"grad_norm": 0.008834806270897388, |
|
"learning_rate": 1.1358406728556693e-05, |
|
"loss": 0.0, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 2.320875981909069, |
|
"grad_norm": 0.017421774566173553, |
|
"learning_rate": 1.1318733634848847e-05, |
|
"loss": 0.0, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.32325636753154, |
|
"grad_norm": 0.0008695673895999789, |
|
"learning_rate": 1.1279060541140999e-05, |
|
"loss": 0.0, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 2.325636753154011, |
|
"grad_norm": 0.007985567674040794, |
|
"learning_rate": 1.1239387447433151e-05, |
|
"loss": 0.0001, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 2.328017138776482, |
|
"grad_norm": 0.0002991770743392408, |
|
"learning_rate": 1.1199714353725305e-05, |
|
"loss": 0.0, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 2.3303975243989528, |
|
"grad_norm": 0.0018964770715683699, |
|
"learning_rate": 1.1160041260017457e-05, |
|
"loss": 0.0, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 2.3327779100214237, |
|
"grad_norm": 0.0003782061976380646, |
|
"learning_rate": 1.112036816630961e-05, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.335158295643894, |
|
"grad_norm": 0.0005394426407292485, |
|
"learning_rate": 1.108069507260176e-05, |
|
"loss": 0.0, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 2.337538681266365, |
|
"grad_norm": 0.0008728650282137096, |
|
"learning_rate": 1.1041021978893914e-05, |
|
"loss": 0.0, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 2.339919066888836, |
|
"grad_norm": 1.026079773902893, |
|
"learning_rate": 1.1001348885186068e-05, |
|
"loss": 0.0001, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 2.342299452511307, |
|
"grad_norm": 0.000987286795862019, |
|
"learning_rate": 1.096167579147822e-05, |
|
"loss": 0.0, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 2.3446798381337777, |
|
"grad_norm": 0.0015003952430561185, |
|
"learning_rate": 1.0922002697770372e-05, |
|
"loss": 0.0, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.3470602237562486, |
|
"grad_norm": 0.001296977628953755, |
|
"learning_rate": 1.0882329604062526e-05, |
|
"loss": 0.0, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 2.3494406093787195, |
|
"grad_norm": 0.000640163547359407, |
|
"learning_rate": 1.0842656510354678e-05, |
|
"loss": 0.0, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 2.3518209950011904, |
|
"grad_norm": 0.0009391361963935196, |
|
"learning_rate": 1.0802983416646831e-05, |
|
"loss": 0.0, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 2.3542013806236612, |
|
"grad_norm": 0.0006612977595068514, |
|
"learning_rate": 1.0763310322938983e-05, |
|
"loss": 0.0001, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 2.3565817662461317, |
|
"grad_norm": 0.0014715328579768538, |
|
"learning_rate": 1.0723637229231135e-05, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.3589621518686026, |
|
"grad_norm": 0.0004139976226724684, |
|
"learning_rate": 1.0683964135523289e-05, |
|
"loss": 0.0, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 2.3613425374910735, |
|
"grad_norm": 0.001368595752865076, |
|
"learning_rate": 1.0644291041815441e-05, |
|
"loss": 0.0001, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 2.3637229231135444, |
|
"grad_norm": 0.0010275020031258464, |
|
"learning_rate": 1.0604617948107593e-05, |
|
"loss": 0.0, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 2.3661033087360153, |
|
"grad_norm": 0.0008476102957502007, |
|
"learning_rate": 1.0564944854399747e-05, |
|
"loss": 0.0, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 2.368483694358486, |
|
"grad_norm": 0.019286731258034706, |
|
"learning_rate": 1.0525271760691899e-05, |
|
"loss": 0.0, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.370864079980957, |
|
"grad_norm": 0.0007589785964228213, |
|
"learning_rate": 1.0485598666984052e-05, |
|
"loss": 0.0001, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 2.373244465603428, |
|
"grad_norm": 0.0007659016991965473, |
|
"learning_rate": 1.0445925573276204e-05, |
|
"loss": 0.0, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 2.375624851225899, |
|
"grad_norm": 0.0035345428623259068, |
|
"learning_rate": 1.0406252479568356e-05, |
|
"loss": 0.0, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 2.3780052368483693, |
|
"grad_norm": 0.0021891535725444555, |
|
"learning_rate": 1.036657938586051e-05, |
|
"loss": 0.0, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 2.38038562247084, |
|
"grad_norm": 0.2655426263809204, |
|
"learning_rate": 1.0326906292152662e-05, |
|
"loss": 0.001, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.382766008093311, |
|
"grad_norm": 0.0008121923892758787, |
|
"learning_rate": 1.0287233198444816e-05, |
|
"loss": 0.0001, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 2.385146393715782, |
|
"grad_norm": 0.006638567429035902, |
|
"learning_rate": 1.0247560104736968e-05, |
|
"loss": 0.0, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 2.387526779338253, |
|
"grad_norm": 0.0033031317871063948, |
|
"learning_rate": 1.020788701102912e-05, |
|
"loss": 0.0, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 2.3899071649607238, |
|
"grad_norm": 0.0004701575671788305, |
|
"learning_rate": 1.0168213917321274e-05, |
|
"loss": 0.0002, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 2.3922875505831946, |
|
"grad_norm": 0.007627520710229874, |
|
"learning_rate": 1.0128540823613426e-05, |
|
"loss": 0.0, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.394667936205665, |
|
"grad_norm": 0.0011233366094529629, |
|
"learning_rate": 1.0088867729905578e-05, |
|
"loss": 0.0, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 2.397048321828136, |
|
"grad_norm": 0.0003728682058863342, |
|
"learning_rate": 1.0049194636197731e-05, |
|
"loss": 0.0001, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 2.399428707450607, |
|
"grad_norm": 0.0018078387947753072, |
|
"learning_rate": 1.0009521542489885e-05, |
|
"loss": 0.0, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 2.4018090930730778, |
|
"grad_norm": 0.004032574128359556, |
|
"learning_rate": 9.969848448782037e-06, |
|
"loss": 0.0, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 2.4041894786955487, |
|
"grad_norm": 0.0010251044295728207, |
|
"learning_rate": 9.930175355074189e-06, |
|
"loss": 0.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.4065698643180196, |
|
"grad_norm": 0.0012369100004434586, |
|
"learning_rate": 9.890502261366341e-06, |
|
"loss": 0.0001, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 2.4089502499404905, |
|
"grad_norm": 0.0008841692470014095, |
|
"learning_rate": 9.850829167658495e-06, |
|
"loss": 0.0, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 2.4113306355629613, |
|
"grad_norm": 0.05967468023300171, |
|
"learning_rate": 9.811156073950648e-06, |
|
"loss": 0.0001, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 2.4137110211854322, |
|
"grad_norm": 0.002878790721297264, |
|
"learning_rate": 9.771482980242799e-06, |
|
"loss": 0.0, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 2.4160914068079027, |
|
"grad_norm": 0.0005018101655878127, |
|
"learning_rate": 9.731809886534952e-06, |
|
"loss": 0.0, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.4184717924303736, |
|
"grad_norm": 0.0015724776312708855, |
|
"learning_rate": 9.692136792827106e-06, |
|
"loss": 0.0003, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 2.4208521780528445, |
|
"grad_norm": 0.004237225744873285, |
|
"learning_rate": 9.652463699119258e-06, |
|
"loss": 0.0, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 2.4232325636753154, |
|
"grad_norm": 0.00131317344494164, |
|
"learning_rate": 9.61279060541141e-06, |
|
"loss": 0.0, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 2.4256129492977863, |
|
"grad_norm": 0.002073557348921895, |
|
"learning_rate": 9.573117511703564e-06, |
|
"loss": 0.0, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 2.427993334920257, |
|
"grad_norm": 0.0045993453823029995, |
|
"learning_rate": 9.533444417995716e-06, |
|
"loss": 0.0002, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.430373720542728, |
|
"grad_norm": 0.001618819311261177, |
|
"learning_rate": 9.49377132428787e-06, |
|
"loss": 0.0001, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 2.432754106165199, |
|
"grad_norm": 0.00304215750657022, |
|
"learning_rate": 9.45409823058002e-06, |
|
"loss": 0.0, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 2.43513449178767, |
|
"grad_norm": 0.0007059932686388493, |
|
"learning_rate": 9.414425136872173e-06, |
|
"loss": 0.0, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 2.4375148774101403, |
|
"grad_norm": 0.0031899004243314266, |
|
"learning_rate": 9.374752043164327e-06, |
|
"loss": 0.0, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 2.439895263032611, |
|
"grad_norm": 0.002477418165653944, |
|
"learning_rate": 9.33507894945648e-06, |
|
"loss": 0.0, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.442275648655082, |
|
"grad_norm": 0.00046585980453528464, |
|
"learning_rate": 9.295405855748631e-06, |
|
"loss": 0.0, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 2.444656034277553, |
|
"grad_norm": 0.0005838835495524108, |
|
"learning_rate": 9.255732762040785e-06, |
|
"loss": 0.0, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 2.447036419900024, |
|
"grad_norm": 0.001370543148368597, |
|
"learning_rate": 9.216059668332937e-06, |
|
"loss": 0.0, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 2.4494168055224947, |
|
"grad_norm": 0.0016045079100877047, |
|
"learning_rate": 9.17638657462509e-06, |
|
"loss": 0.0, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 2.4517971911449656, |
|
"grad_norm": 0.0020401678048074245, |
|
"learning_rate": 9.136713480917243e-06, |
|
"loss": 0.0001, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.454177576767436, |
|
"grad_norm": 0.00043605471728369594, |
|
"learning_rate": 9.097040387209395e-06, |
|
"loss": 0.0, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 2.456557962389907, |
|
"grad_norm": 0.0005910994368605316, |
|
"learning_rate": 9.057367293501548e-06, |
|
"loss": 0.0, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 2.458938348012378, |
|
"grad_norm": 0.0005397904315032065, |
|
"learning_rate": 9.0176941997937e-06, |
|
"loss": 0.0, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 2.4613187336348488, |
|
"grad_norm": 0.014002328738570213, |
|
"learning_rate": 8.978021106085852e-06, |
|
"loss": 0.0, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 2.4636991192573197, |
|
"grad_norm": 0.0011001590173691511, |
|
"learning_rate": 8.938348012378006e-06, |
|
"loss": 0.0001, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.4660795048797906, |
|
"grad_norm": 0.0029695210978388786, |
|
"learning_rate": 8.898674918670158e-06, |
|
"loss": 0.0, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 2.4684598905022614, |
|
"grad_norm": 0.00410072086378932, |
|
"learning_rate": 8.859001824962312e-06, |
|
"loss": 0.0, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 2.4708402761247323, |
|
"grad_norm": 0.0005128366756252944, |
|
"learning_rate": 8.819328731254464e-06, |
|
"loss": 0.0, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 2.4732206617472032, |
|
"grad_norm": 0.0021037100814282894, |
|
"learning_rate": 8.779655637546616e-06, |
|
"loss": 0.0, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 2.4756010473696737, |
|
"grad_norm": 0.0005958130932413042, |
|
"learning_rate": 8.73998254383877e-06, |
|
"loss": 0.0, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.4779814329921446, |
|
"grad_norm": 0.0021961687598377466, |
|
"learning_rate": 8.700309450130921e-06, |
|
"loss": 0.0001, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 2.4803618186146155, |
|
"grad_norm": 0.0011290331603959203, |
|
"learning_rate": 8.660636356423073e-06, |
|
"loss": 0.0, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 2.4827422042370864, |
|
"grad_norm": 0.003101737704128027, |
|
"learning_rate": 8.620963262715227e-06, |
|
"loss": 0.0, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 2.4851225898595573, |
|
"grad_norm": 0.010269707068800926, |
|
"learning_rate": 8.581290169007379e-06, |
|
"loss": 0.0, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 2.487502975482028, |
|
"grad_norm": 0.0006016406114213169, |
|
"learning_rate": 8.541617075299533e-06, |
|
"loss": 0.0, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.489883361104499, |
|
"grad_norm": 0.012370145879685879, |
|
"learning_rate": 8.501943981591685e-06, |
|
"loss": 0.0, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 2.49226374672697, |
|
"grad_norm": 0.002209730911999941, |
|
"learning_rate": 8.462270887883837e-06, |
|
"loss": 0.0, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 2.494644132349441, |
|
"grad_norm": 0.0002978077973239124, |
|
"learning_rate": 8.42259779417599e-06, |
|
"loss": 0.0, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 2.4970245179719113, |
|
"grad_norm": 0.0006728899315930903, |
|
"learning_rate": 8.382924700468144e-06, |
|
"loss": 0.0, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 2.499404903594382, |
|
"grad_norm": 0.0008764348458498716, |
|
"learning_rate": 8.343251606760296e-06, |
|
"loss": 0.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.501785289216853, |
|
"grad_norm": 0.001580104581080377, |
|
"learning_rate": 8.303578513052448e-06, |
|
"loss": 0.0, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 2.504165674839324, |
|
"grad_norm": 0.0003571589768398553, |
|
"learning_rate": 8.2639054193446e-06, |
|
"loss": 0.0, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 2.506546060461795, |
|
"grad_norm": 0.004758020397275686, |
|
"learning_rate": 8.224232325636754e-06, |
|
"loss": 0.0, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 2.5089264460842657, |
|
"grad_norm": 0.0013680767733603716, |
|
"learning_rate": 8.184559231928906e-06, |
|
"loss": 0.0, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 2.5113068317067366, |
|
"grad_norm": 0.0010658970568329096, |
|
"learning_rate": 8.144886138221058e-06, |
|
"loss": 0.0, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.513687217329207, |
|
"grad_norm": 0.0007452235440723598, |
|
"learning_rate": 8.105213044513212e-06, |
|
"loss": 0.0002, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 2.5160676029516784, |
|
"grad_norm": 0.0006281470414251089, |
|
"learning_rate": 8.065539950805365e-06, |
|
"loss": 0.0, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 2.518447988574149, |
|
"grad_norm": 0.0007866009837016463, |
|
"learning_rate": 8.025866857097517e-06, |
|
"loss": 0.0, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 2.5208283741966198, |
|
"grad_norm": 0.00039683215436525643, |
|
"learning_rate": 7.98619376338967e-06, |
|
"loss": 0.0, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 2.5232087598190907, |
|
"grad_norm": 0.0009177124593406916, |
|
"learning_rate": 7.946520669681823e-06, |
|
"loss": 0.0, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.5255891454415615, |
|
"grad_norm": 0.00038271176163107157, |
|
"learning_rate": 7.906847575973975e-06, |
|
"loss": 0.0, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 2.5279695310640324, |
|
"grad_norm": 0.00041592001798562706, |
|
"learning_rate": 7.867174482266127e-06, |
|
"loss": 0.0, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 2.5303499166865033, |
|
"grad_norm": 0.0009455361287109554, |
|
"learning_rate": 7.827501388558279e-06, |
|
"loss": 0.0, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 2.5327303023089742, |
|
"grad_norm": 0.0005674211424775422, |
|
"learning_rate": 7.787828294850433e-06, |
|
"loss": 0.0001, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 2.5351106879314447, |
|
"grad_norm": 0.008180541917681694, |
|
"learning_rate": 7.748155201142586e-06, |
|
"loss": 0.0001, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.537491073553916, |
|
"grad_norm": 0.006044210400432348, |
|
"learning_rate": 7.708482107434738e-06, |
|
"loss": 0.0, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 2.5398714591763865, |
|
"grad_norm": 0.00039350485894829035, |
|
"learning_rate": 7.66880901372689e-06, |
|
"loss": 0.0, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 2.5422518447988574, |
|
"grad_norm": 0.0007660723640583456, |
|
"learning_rate": 7.629135920019044e-06, |
|
"loss": 0.0, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 2.5446322304213282, |
|
"grad_norm": 0.001309241633862257, |
|
"learning_rate": 7.589462826311196e-06, |
|
"loss": 0.0, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 2.547012616043799, |
|
"grad_norm": 0.023756977170705795, |
|
"learning_rate": 7.549789732603349e-06, |
|
"loss": 0.0002, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.54939300166627, |
|
"grad_norm": 0.002046087756752968, |
|
"learning_rate": 7.510116638895501e-06, |
|
"loss": 0.0, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 2.551773387288741, |
|
"grad_norm": 0.0047508729621768, |
|
"learning_rate": 7.470443545187654e-06, |
|
"loss": 0.0, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 2.554153772911212, |
|
"grad_norm": 0.0010949558345600963, |
|
"learning_rate": 7.4307704514798075e-06, |
|
"loss": 0.0, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 2.5565341585336823, |
|
"grad_norm": 0.010589073412120342, |
|
"learning_rate": 7.391097357771959e-06, |
|
"loss": 0.0, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 2.558914544156153, |
|
"grad_norm": 0.0006332534248940647, |
|
"learning_rate": 7.351424264064112e-06, |
|
"loss": 0.0, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.561294929778624, |
|
"grad_norm": 0.00027181513723917305, |
|
"learning_rate": 7.311751170356265e-06, |
|
"loss": 0.0, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 2.563675315401095, |
|
"grad_norm": 0.0036267938558012247, |
|
"learning_rate": 7.272078076648417e-06, |
|
"loss": 0.0, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 2.566055701023566, |
|
"grad_norm": 0.002974023576825857, |
|
"learning_rate": 7.23240498294057e-06, |
|
"loss": 0.0, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 2.5684360866460367, |
|
"grad_norm": 0.0005654848064295948, |
|
"learning_rate": 7.192731889232724e-06, |
|
"loss": 0.0001, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 2.5708164722685076, |
|
"grad_norm": 0.001776995835825801, |
|
"learning_rate": 7.153058795524875e-06, |
|
"loss": 0.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.573196857890978, |
|
"grad_norm": 0.0031643370166420937, |
|
"learning_rate": 7.1133857018170286e-06, |
|
"loss": 0.0, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 2.5755772435134494, |
|
"grad_norm": 0.0006117381271906197, |
|
"learning_rate": 7.07371260810918e-06, |
|
"loss": 0.0, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.57795762913592, |
|
"grad_norm": 0.00013082509394735098, |
|
"learning_rate": 7.034039514401333e-06, |
|
"loss": 0.0, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 2.5803380147583908, |
|
"grad_norm": 0.009411906823515892, |
|
"learning_rate": 6.994366420693486e-06, |
|
"loss": 0.0, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.5827184003808616, |
|
"grad_norm": 0.007766501512378454, |
|
"learning_rate": 6.954693326985638e-06, |
|
"loss": 0.0, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.5850987860033325, |
|
"grad_norm": 0.001036152825690806, |
|
"learning_rate": 6.915020233277791e-06, |
|
"loss": 0.0, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.5874791716258034, |
|
"grad_norm": 0.0007062302902340889, |
|
"learning_rate": 6.875347139569945e-06, |
|
"loss": 0.0, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 2.5898595572482743, |
|
"grad_norm": 0.004976709373295307, |
|
"learning_rate": 6.835674045862096e-06, |
|
"loss": 0.0, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.592239942870745, |
|
"grad_norm": 0.0005074761575087905, |
|
"learning_rate": 6.79600095215425e-06, |
|
"loss": 0.0, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 2.5946203284932157, |
|
"grad_norm": 0.0028977631591260433, |
|
"learning_rate": 6.7563278584464025e-06, |
|
"loss": 0.0, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.597000714115687, |
|
"grad_norm": 0.004557565785944462, |
|
"learning_rate": 6.7166547647385545e-06, |
|
"loss": 0.0, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 2.5993810997381575, |
|
"grad_norm": 0.0018358832458034158, |
|
"learning_rate": 6.676981671030707e-06, |
|
"loss": 0.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.6017614853606283, |
|
"grad_norm": 0.0014729060931131244, |
|
"learning_rate": 6.637308577322859e-06, |
|
"loss": 0.0, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 2.6041418709830992, |
|
"grad_norm": 0.0004332439857535064, |
|
"learning_rate": 6.597635483615012e-06, |
|
"loss": 0.0, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.60652225660557, |
|
"grad_norm": 0.0009114540298469365, |
|
"learning_rate": 6.557962389907166e-06, |
|
"loss": 0.0, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.608902642228041, |
|
"grad_norm": 0.010355968959629536, |
|
"learning_rate": 6.518289296199318e-06, |
|
"loss": 0.0, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.611283027850512, |
|
"grad_norm": 0.054084401577711105, |
|
"learning_rate": 6.478616202491471e-06, |
|
"loss": 0.0, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 2.613663413472983, |
|
"grad_norm": 0.0009903626050800085, |
|
"learning_rate": 6.438943108783624e-06, |
|
"loss": 0.0, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.6160437990954533, |
|
"grad_norm": 0.00019378839351702482, |
|
"learning_rate": 6.399270015075776e-06, |
|
"loss": 0.0, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 2.618424184717924, |
|
"grad_norm": 0.0006563541246578097, |
|
"learning_rate": 6.3595969213679285e-06, |
|
"loss": 0.0, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.620804570340395, |
|
"grad_norm": 0.0006744746351614594, |
|
"learning_rate": 6.3199238276600805e-06, |
|
"loss": 0.0, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 2.623184955962866, |
|
"grad_norm": 0.0011966971214860678, |
|
"learning_rate": 6.280250733952233e-06, |
|
"loss": 0.0, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.625565341585337, |
|
"grad_norm": 0.0017309453105553985, |
|
"learning_rate": 6.240577640244387e-06, |
|
"loss": 0.0, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 2.6279457272078077, |
|
"grad_norm": 0.0008661380270496011, |
|
"learning_rate": 6.200904546536539e-06, |
|
"loss": 0.0005, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.6303261128302786, |
|
"grad_norm": 0.0003683891554828733, |
|
"learning_rate": 6.161231452828692e-06, |
|
"loss": 0.0, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.6327064984527495, |
|
"grad_norm": 0.0005742148496210575, |
|
"learning_rate": 6.121558359120845e-06, |
|
"loss": 0.0, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.6350868840752204, |
|
"grad_norm": 0.0010009456891566515, |
|
"learning_rate": 6.0818852654129976e-06, |
|
"loss": 0.0, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 2.637467269697691, |
|
"grad_norm": 0.0008674330892972648, |
|
"learning_rate": 6.0422121717051496e-06, |
|
"loss": 0.0001, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.6398476553201617, |
|
"grad_norm": 0.00011453252227511257, |
|
"learning_rate": 6.002539077997302e-06, |
|
"loss": 0.0, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 2.6422280409426326, |
|
"grad_norm": 0.0014997412217780948, |
|
"learning_rate": 5.962865984289455e-06, |
|
"loss": 0.0, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.6446084265651035, |
|
"grad_norm": 0.0013535526813939214, |
|
"learning_rate": 5.923192890581608e-06, |
|
"loss": 0.0, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 2.6469888121875744, |
|
"grad_norm": 0.0010607549920678139, |
|
"learning_rate": 5.883519796873761e-06, |
|
"loss": 0.0, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.6493691978100453, |
|
"grad_norm": 0.001384345581755042, |
|
"learning_rate": 5.843846703165913e-06, |
|
"loss": 0.0, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 2.651749583432516, |
|
"grad_norm": 0.009620246477425098, |
|
"learning_rate": 5.804173609458066e-06, |
|
"loss": 0.0, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.6541299690549867, |
|
"grad_norm": 0.004576113075017929, |
|
"learning_rate": 5.764500515750219e-06, |
|
"loss": 0.0, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.656510354677458, |
|
"grad_norm": 0.0007963149109855294, |
|
"learning_rate": 5.7248274220423715e-06, |
|
"loss": 0.0, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.6588907402999284, |
|
"grad_norm": 0.0005275904550217092, |
|
"learning_rate": 5.6851543283345235e-06, |
|
"loss": 0.0, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 2.6612711259223993, |
|
"grad_norm": 0.0007748051430098712, |
|
"learning_rate": 5.645481234626677e-06, |
|
"loss": 0.0, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.6636515115448702, |
|
"grad_norm": 0.0005676033324562013, |
|
"learning_rate": 5.605808140918829e-06, |
|
"loss": 0.0, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 2.666031897167341, |
|
"grad_norm": 0.0009870273061096668, |
|
"learning_rate": 5.566135047210982e-06, |
|
"loss": 0.0, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.668412282789812, |
|
"grad_norm": 0.0004960622172802687, |
|
"learning_rate": 5.526461953503134e-06, |
|
"loss": 0.0, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 2.670792668412283, |
|
"grad_norm": 0.2789072096347809, |
|
"learning_rate": 5.486788859795288e-06, |
|
"loss": 0.0001, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.673173054034754, |
|
"grad_norm": 0.004494486376643181, |
|
"learning_rate": 5.44711576608744e-06, |
|
"loss": 0.0001, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 2.6755534396572243, |
|
"grad_norm": 0.0009736506035551429, |
|
"learning_rate": 5.407442672379593e-06, |
|
"loss": 0.0, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.677933825279695, |
|
"grad_norm": 0.0027844863943755627, |
|
"learning_rate": 5.367769578671745e-06, |
|
"loss": 0.0, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.680314210902166, |
|
"grad_norm": 0.013426104560494423, |
|
"learning_rate": 5.328096484963898e-06, |
|
"loss": 0.0, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.682694596524637, |
|
"grad_norm": 0.0002785604156088084, |
|
"learning_rate": 5.28842339125605e-06, |
|
"loss": 0.0, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 2.685074982147108, |
|
"grad_norm": 0.0007079096976667643, |
|
"learning_rate": 5.248750297548203e-06, |
|
"loss": 0.0, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.6874553677695787, |
|
"grad_norm": 0.0004877845640294254, |
|
"learning_rate": 5.209077203840355e-06, |
|
"loss": 0.0001, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 2.6898357533920496, |
|
"grad_norm": 0.029308408498764038, |
|
"learning_rate": 5.169404110132509e-06, |
|
"loss": 0.0, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.6922161390145205, |
|
"grad_norm": 0.0011891064932569861, |
|
"learning_rate": 5.129731016424661e-06, |
|
"loss": 0.0, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 2.6945965246369914, |
|
"grad_norm": 0.009328281506896019, |
|
"learning_rate": 5.090057922716814e-06, |
|
"loss": 0.0, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.696976910259462, |
|
"grad_norm": 0.0010127691784873605, |
|
"learning_rate": 5.0503848290089666e-06, |
|
"loss": 0.0, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 2.6993572958819327, |
|
"grad_norm": 0.0006704577244818211, |
|
"learning_rate": 5.010711735301119e-06, |
|
"loss": 0.0, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.7017376815044036, |
|
"grad_norm": 0.0015914466930553317, |
|
"learning_rate": 4.971038641593271e-06, |
|
"loss": 0.0, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.7041180671268745, |
|
"grad_norm": 0.00046926282811909914, |
|
"learning_rate": 4.931365547885424e-06, |
|
"loss": 0.0, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.7064984527493454, |
|
"grad_norm": 0.0008572743972763419, |
|
"learning_rate": 4.891692454177577e-06, |
|
"loss": 0.0, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 2.7088788383718163, |
|
"grad_norm": 0.001012885244563222, |
|
"learning_rate": 4.85201936046973e-06, |
|
"loss": 0.0, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.711259223994287, |
|
"grad_norm": 0.000291361880954355, |
|
"learning_rate": 4.812346266761882e-06, |
|
"loss": 0.0, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 2.7136396096167577, |
|
"grad_norm": 0.001445894013158977, |
|
"learning_rate": 4.772673173054035e-06, |
|
"loss": 0.0, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.716019995239229, |
|
"grad_norm": 0.0007329813088290393, |
|
"learning_rate": 4.733000079346188e-06, |
|
"loss": 0.0, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 2.7184003808616994, |
|
"grad_norm": 0.02237352356314659, |
|
"learning_rate": 4.6933269856383405e-06, |
|
"loss": 0.0, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.7207807664841703, |
|
"grad_norm": 0.0004787015204783529, |
|
"learning_rate": 4.6536538919304925e-06, |
|
"loss": 0.0, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 2.7231611521066412, |
|
"grad_norm": 0.0011766423704102635, |
|
"learning_rate": 4.613980798222645e-06, |
|
"loss": 0.0, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.725541537729112, |
|
"grad_norm": 0.0003720026579685509, |
|
"learning_rate": 4.574307704514798e-06, |
|
"loss": 0.0, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.727921923351583, |
|
"grad_norm": 0.0004271367215551436, |
|
"learning_rate": 4.534634610806951e-06, |
|
"loss": 0.0, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.730302308974054, |
|
"grad_norm": 0.001319264993071556, |
|
"learning_rate": 4.494961517099103e-06, |
|
"loss": 0.0, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 2.732682694596525, |
|
"grad_norm": 0.0012237573973834515, |
|
"learning_rate": 4.455288423391257e-06, |
|
"loss": 0.0, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.7350630802189952, |
|
"grad_norm": 0.00044418079778552055, |
|
"learning_rate": 4.415615329683409e-06, |
|
"loss": 0.0, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 2.7374434658414666, |
|
"grad_norm": 0.0009368477039970458, |
|
"learning_rate": 4.375942235975562e-06, |
|
"loss": 0.0, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.739823851463937, |
|
"grad_norm": 0.0015390801709145308, |
|
"learning_rate": 4.336269142267714e-06, |
|
"loss": 0.0001, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 2.742204237086408, |
|
"grad_norm": 0.00022943236399441957, |
|
"learning_rate": 4.296596048559867e-06, |
|
"loss": 0.0, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.744584622708879, |
|
"grad_norm": 0.0031924904324114323, |
|
"learning_rate": 4.256922954852019e-06, |
|
"loss": 0.0, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 2.7469650083313497, |
|
"grad_norm": 0.0011005508713424206, |
|
"learning_rate": 4.217249861144172e-06, |
|
"loss": 0.0, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.7493453939538206, |
|
"grad_norm": 0.00039162219036370516, |
|
"learning_rate": 4.177576767436325e-06, |
|
"loss": 0.0, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.7517257795762915, |
|
"grad_norm": 0.0011376795591786504, |
|
"learning_rate": 4.137903673728478e-06, |
|
"loss": 0.0, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.7541061651987624, |
|
"grad_norm": 0.0005944286240264773, |
|
"learning_rate": 4.09823058002063e-06, |
|
"loss": 0.0, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 2.756486550821233, |
|
"grad_norm": 0.0007298539276234806, |
|
"learning_rate": 4.058557486312783e-06, |
|
"loss": 0.0, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.7588669364437037, |
|
"grad_norm": 0.00018211067072115839, |
|
"learning_rate": 4.0188843926049356e-06, |
|
"loss": 0.0, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 2.7612473220661746, |
|
"grad_norm": 0.0034182893577963114, |
|
"learning_rate": 3.9792112988970884e-06, |
|
"loss": 0.0, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.7636277076886455, |
|
"grad_norm": 0.000364614010322839, |
|
"learning_rate": 3.939538205189241e-06, |
|
"loss": 0.0, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 2.7660080933111164, |
|
"grad_norm": 0.0021814818028360605, |
|
"learning_rate": 3.899865111481393e-06, |
|
"loss": 0.0, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.7683884789335873, |
|
"grad_norm": 0.0014812530716881156, |
|
"learning_rate": 3.860192017773546e-06, |
|
"loss": 0.0001, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 2.770768864556058, |
|
"grad_norm": 0.0005358079797588289, |
|
"learning_rate": 3.820518924065699e-06, |
|
"loss": 0.0, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.7731492501785286, |
|
"grad_norm": 0.00028996021137572825, |
|
"learning_rate": 3.7808458303578514e-06, |
|
"loss": 0.0, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.775529635801, |
|
"grad_norm": 0.001182155217975378, |
|
"learning_rate": 3.741172736650004e-06, |
|
"loss": 0.0, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.7779100214234704, |
|
"grad_norm": 0.00023413899180013686, |
|
"learning_rate": 3.701499642942157e-06, |
|
"loss": 0.0, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 2.7802904070459413, |
|
"grad_norm": 0.0006019670399837196, |
|
"learning_rate": 3.6618265492343095e-06, |
|
"loss": 0.0, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.782670792668412, |
|
"grad_norm": 0.0004944771062582731, |
|
"learning_rate": 3.622153455526462e-06, |
|
"loss": 0.0, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 2.785051178290883, |
|
"grad_norm": 6.98843869031407e-05, |
|
"learning_rate": 3.5824803618186144e-06, |
|
"loss": 0.0, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.787431563913354, |
|
"grad_norm": 0.0005101510905660689, |
|
"learning_rate": 3.5428072681107677e-06, |
|
"loss": 0.0, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 2.789811949535825, |
|
"grad_norm": 0.00034247711300849915, |
|
"learning_rate": 3.50313417440292e-06, |
|
"loss": 0.0, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.792192335158296, |
|
"grad_norm": 0.00044277720735408366, |
|
"learning_rate": 3.4634610806950725e-06, |
|
"loss": 0.0, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 2.7945727207807662, |
|
"grad_norm": 0.0005088172620162368, |
|
"learning_rate": 3.423787986987225e-06, |
|
"loss": 0.0, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.7969531064032376, |
|
"grad_norm": 0.00021512300008907914, |
|
"learning_rate": 3.384114893279378e-06, |
|
"loss": 0.0, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.799333492025708, |
|
"grad_norm": 0.0007052098517306149, |
|
"learning_rate": 3.3444417995715306e-06, |
|
"loss": 0.0, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.801713877648179, |
|
"grad_norm": 0.036882251501083374, |
|
"learning_rate": 3.304768705863683e-06, |
|
"loss": 0.0, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 2.80409426327065, |
|
"grad_norm": 0.00013749166100751609, |
|
"learning_rate": 3.2650956121558363e-06, |
|
"loss": 0.0, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.8064746488931207, |
|
"grad_norm": 0.0006571552366949618, |
|
"learning_rate": 3.2254225184479888e-06, |
|
"loss": 0.0, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 2.8088550345155916, |
|
"grad_norm": 0.0008290376281365752, |
|
"learning_rate": 3.185749424740141e-06, |
|
"loss": 0.0, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.8112354201380625, |
|
"grad_norm": 8.49374700919725e-05, |
|
"learning_rate": 3.146076331032294e-06, |
|
"loss": 0.0, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 2.8136158057605334, |
|
"grad_norm": 0.00033748464193195105, |
|
"learning_rate": 3.1064032373244465e-06, |
|
"loss": 0.0, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.815996191383004, |
|
"grad_norm": 0.0003914514381904155, |
|
"learning_rate": 3.0667301436165993e-06, |
|
"loss": 0.0, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 2.8183765770054747, |
|
"grad_norm": 0.00029730124515481293, |
|
"learning_rate": 3.0270570499087517e-06, |
|
"loss": 0.0, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.8207569626279456, |
|
"grad_norm": 0.00035526990541256964, |
|
"learning_rate": 2.9873839562009046e-06, |
|
"loss": 0.0, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.8231373482504165, |
|
"grad_norm": 0.0007370146340690553, |
|
"learning_rate": 2.9477108624930574e-06, |
|
"loss": 0.0, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.8255177338728874, |
|
"grad_norm": 8.048515883274376e-05, |
|
"learning_rate": 2.90803776878521e-06, |
|
"loss": 0.0, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 2.8278981194953583, |
|
"grad_norm": 0.00022186528076417744, |
|
"learning_rate": 2.8683646750773627e-06, |
|
"loss": 0.0, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.830278505117829, |
|
"grad_norm": 0.0004252239887136966, |
|
"learning_rate": 2.8286915813695156e-06, |
|
"loss": 0.0, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 2.8326588907403, |
|
"grad_norm": 0.00027670618146657944, |
|
"learning_rate": 2.789018487661668e-06, |
|
"loss": 0.0, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.835039276362771, |
|
"grad_norm": 0.0020431778393685818, |
|
"learning_rate": 2.749345393953821e-06, |
|
"loss": 0.0, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 2.8374196619852414, |
|
"grad_norm": 0.001547365915030241, |
|
"learning_rate": 2.7096723002459737e-06, |
|
"loss": 0.0, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.8398000476077123, |
|
"grad_norm": 0.0013964555691927671, |
|
"learning_rate": 2.669999206538126e-06, |
|
"loss": 0.0, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 2.842180433230183, |
|
"grad_norm": 0.00027170139946974814, |
|
"learning_rate": 2.630326112830279e-06, |
|
"loss": 0.0, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.844560818852654, |
|
"grad_norm": 0.0008765398524701595, |
|
"learning_rate": 2.5906530191224314e-06, |
|
"loss": 0.0, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.846941204475125, |
|
"grad_norm": 0.00015922258899081498, |
|
"learning_rate": 2.5509799254145842e-06, |
|
"loss": 0.0, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.849321590097596, |
|
"grad_norm": 0.00011323492071824148, |
|
"learning_rate": 2.5113068317067367e-06, |
|
"loss": 0.0, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 2.851701975720067, |
|
"grad_norm": 0.0008671206305734813, |
|
"learning_rate": 2.4716337379988895e-06, |
|
"loss": 0.0001, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.8540823613425372, |
|
"grad_norm": 0.00013449507241602987, |
|
"learning_rate": 2.431960644291042e-06, |
|
"loss": 0.0, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 2.8564627469650086, |
|
"grad_norm": 0.0008318678010255098, |
|
"learning_rate": 2.3922875505831948e-06, |
|
"loss": 0.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.858843132587479, |
|
"grad_norm": 0.0012901159934699535, |
|
"learning_rate": 2.352614456875347e-06, |
|
"loss": 0.0, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 2.86122351820995, |
|
"grad_norm": 0.00032769294921308756, |
|
"learning_rate": 2.3129413631675e-06, |
|
"loss": 0.0, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 2.863603903832421, |
|
"grad_norm": 0.0022394724655896425, |
|
"learning_rate": 2.2732682694596525e-06, |
|
"loss": 0.0, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 2.8659842894548917, |
|
"grad_norm": 0.0001916442415677011, |
|
"learning_rate": 2.2335951757518053e-06, |
|
"loss": 0.0, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 2.8683646750773626, |
|
"grad_norm": 0.0008263205527327955, |
|
"learning_rate": 2.1939220820439578e-06, |
|
"loss": 0.0, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.8707450606998335, |
|
"grad_norm": 0.01558750867843628, |
|
"learning_rate": 2.1542489883361106e-06, |
|
"loss": 0.0, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 2.8731254463223044, |
|
"grad_norm": 0.0005802076193504035, |
|
"learning_rate": 2.1145758946282635e-06, |
|
"loss": 0.0002, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 2.875505831944775, |
|
"grad_norm": 0.0006769265746697783, |
|
"learning_rate": 2.074902800920416e-06, |
|
"loss": 0.0, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 2.877886217567246, |
|
"grad_norm": 0.00040787094621919096, |
|
"learning_rate": 2.0352297072125687e-06, |
|
"loss": 0.0, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 2.8802666031897166, |
|
"grad_norm": 0.00034027136280201375, |
|
"learning_rate": 1.995556613504721e-06, |
|
"loss": 0.0, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.8826469888121875, |
|
"grad_norm": 0.008367573842406273, |
|
"learning_rate": 1.955883519796874e-06, |
|
"loss": 0.0, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 2.8850273744346584, |
|
"grad_norm": 0.0002640595193952322, |
|
"learning_rate": 1.9162104260890264e-06, |
|
"loss": 0.0, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 2.8874077600571293, |
|
"grad_norm": 0.0006561621557921171, |
|
"learning_rate": 1.8765373323811793e-06, |
|
"loss": 0.0, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 2.8897881456796, |
|
"grad_norm": 0.0008464111597277224, |
|
"learning_rate": 1.8368642386733317e-06, |
|
"loss": 0.0, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 2.892168531302071, |
|
"grad_norm": 0.0003002223384100944, |
|
"learning_rate": 1.7971911449654846e-06, |
|
"loss": 0.0, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.894548916924542, |
|
"grad_norm": 0.0003043843025807291, |
|
"learning_rate": 1.757518051257637e-06, |
|
"loss": 0.0, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 2.8969293025470124, |
|
"grad_norm": 0.00041168101597577333, |
|
"learning_rate": 1.7178449575497898e-06, |
|
"loss": 0.0, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 2.8993096881694833, |
|
"grad_norm": 0.002103559672832489, |
|
"learning_rate": 1.6781718638419423e-06, |
|
"loss": 0.0, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 2.901690073791954, |
|
"grad_norm": 0.00029975874349474907, |
|
"learning_rate": 1.6384987701340951e-06, |
|
"loss": 0.0, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 2.904070459414425, |
|
"grad_norm": 0.004904668778181076, |
|
"learning_rate": 1.5988256764262475e-06, |
|
"loss": 0.0001, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.906450845036896, |
|
"grad_norm": 0.0009001428843475878, |
|
"learning_rate": 1.5591525827184004e-06, |
|
"loss": 0.0, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 2.908831230659367, |
|
"grad_norm": 0.0004976601339876652, |
|
"learning_rate": 1.519479489010553e-06, |
|
"loss": 0.0, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 2.9112116162818378, |
|
"grad_norm": 0.0002044235880021006, |
|
"learning_rate": 1.4798063953027057e-06, |
|
"loss": 0.0, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 2.9135920019043082, |
|
"grad_norm": 0.0003118833410553634, |
|
"learning_rate": 1.4401333015948583e-06, |
|
"loss": 0.0, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 2.9159723875267796, |
|
"grad_norm": 0.00038868881529197097, |
|
"learning_rate": 1.4004602078870111e-06, |
|
"loss": 0.0, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.91835277314925, |
|
"grad_norm": 0.0005747165414504707, |
|
"learning_rate": 1.3607871141791638e-06, |
|
"loss": 0.0, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 2.920733158771721, |
|
"grad_norm": 0.0013731828657910228, |
|
"learning_rate": 1.3211140204713164e-06, |
|
"loss": 0.0, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 2.923113544394192, |
|
"grad_norm": 0.000688336614985019, |
|
"learning_rate": 1.281440926763469e-06, |
|
"loss": 0.0, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 2.9254939300166627, |
|
"grad_norm": 0.00041094853077083826, |
|
"learning_rate": 1.241767833055622e-06, |
|
"loss": 0.0, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 2.9278743156391336, |
|
"grad_norm": 0.00040040462044999003, |
|
"learning_rate": 1.2020947393477745e-06, |
|
"loss": 0.0, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.9302547012616045, |
|
"grad_norm": 0.0027486933395266533, |
|
"learning_rate": 1.1624216456399272e-06, |
|
"loss": 0.0, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 2.9326350868840754, |
|
"grad_norm": 0.000705558864865452, |
|
"learning_rate": 1.1227485519320798e-06, |
|
"loss": 0.0, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 2.935015472506546, |
|
"grad_norm": 0.0013841954059898853, |
|
"learning_rate": 1.0830754582242325e-06, |
|
"loss": 0.0001, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 2.937395858129017, |
|
"grad_norm": 0.0013595300260931253, |
|
"learning_rate": 1.043402364516385e-06, |
|
"loss": 0.0, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 2.9397762437514876, |
|
"grad_norm": 0.0011891273315995932, |
|
"learning_rate": 1.0037292708085377e-06, |
|
"loss": 0.0, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.9421566293739585, |
|
"grad_norm": 0.0009695956250652671, |
|
"learning_rate": 9.640561771006904e-07, |
|
"loss": 0.0, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 2.9445370149964294, |
|
"grad_norm": 0.00034754411899484694, |
|
"learning_rate": 9.24383083392843e-07, |
|
"loss": 0.0001, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 2.9469174006189003, |
|
"grad_norm": 0.00020417921768967062, |
|
"learning_rate": 8.847099896849956e-07, |
|
"loss": 0.0, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 2.949297786241371, |
|
"grad_norm": 0.0010077544720843434, |
|
"learning_rate": 8.450368959771483e-07, |
|
"loss": 0.0, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 2.951678171863842, |
|
"grad_norm": 0.0006951851537451148, |
|
"learning_rate": 8.053638022693009e-07, |
|
"loss": 0.0, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.954058557486313, |
|
"grad_norm": 0.0005225545028224587, |
|
"learning_rate": 7.656907085614537e-07, |
|
"loss": 0.0, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 2.9564389431087834, |
|
"grad_norm": 0.0004363077168818563, |
|
"learning_rate": 7.260176148536063e-07, |
|
"loss": 0.0, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 2.9588193287312543, |
|
"grad_norm": 0.00024609945830889046, |
|
"learning_rate": 6.863445211457589e-07, |
|
"loss": 0.0, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 2.961199714353725, |
|
"grad_norm": 0.06491145491600037, |
|
"learning_rate": 6.466714274379116e-07, |
|
"loss": 0.0, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 2.963580099976196, |
|
"grad_norm": 0.0004482944495975971, |
|
"learning_rate": 6.069983337300642e-07, |
|
"loss": 0.0, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.965960485598667, |
|
"grad_norm": 0.001836300129070878, |
|
"learning_rate": 5.67325240022217e-07, |
|
"loss": 0.0, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 2.968340871221138, |
|
"grad_norm": 0.0004112005408387631, |
|
"learning_rate": 5.276521463143697e-07, |
|
"loss": 0.0, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 2.9707212568436088, |
|
"grad_norm": 0.0020831027068197727, |
|
"learning_rate": 4.879790526065223e-07, |
|
"loss": 0.0, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 2.9731016424660797, |
|
"grad_norm": 0.0012763678096234798, |
|
"learning_rate": 4.4830595889867493e-07, |
|
"loss": 0.0, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 2.9754820280885506, |
|
"grad_norm": 0.0011779662454500794, |
|
"learning_rate": 4.086328651908276e-07, |
|
"loss": 0.0, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.977862413711021, |
|
"grad_norm": 0.0005871544708497822, |
|
"learning_rate": 3.6895977148298026e-07, |
|
"loss": 0.0, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 2.980242799333492, |
|
"grad_norm": 0.002057824982330203, |
|
"learning_rate": 3.2928667777513295e-07, |
|
"loss": 0.0, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.982623184955963, |
|
"grad_norm": 0.00029588877805508673, |
|
"learning_rate": 2.896135840672856e-07, |
|
"loss": 0.0, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 2.9850035705784337, |
|
"grad_norm": 0.0004726073530036956, |
|
"learning_rate": 2.499404903594382e-07, |
|
"loss": 0.0, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.9873839562009046, |
|
"grad_norm": 0.0014838631032034755, |
|
"learning_rate": 2.102673966515909e-07, |
|
"loss": 0.0, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.9897643418233755, |
|
"grad_norm": 0.0010778923751786351, |
|
"learning_rate": 1.7059430294374355e-07, |
|
"loss": 0.0, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.9921447274458464, |
|
"grad_norm": 0.0007851801346987486, |
|
"learning_rate": 1.3092120923589622e-07, |
|
"loss": 0.0, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 2.994525113068317, |
|
"grad_norm": 0.00047710456419736147, |
|
"learning_rate": 9.124811552804888e-08, |
|
"loss": 0.0, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.996905498690788, |
|
"grad_norm": 0.003749624127522111, |
|
"learning_rate": 5.1575021820201544e-08, |
|
"loss": 0.0, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 2.9992858843132586, |
|
"grad_norm": 0.0007799621089361608, |
|
"learning_rate": 1.1901928112354202e-08, |
|
"loss": 0.0001, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.340411811019294e-07, |
|
"eval_runtime": 52.9973, |
|
"eval_samples_per_second": 35.247, |
|
"eval_steps_per_second": 8.812, |
|
"step": 12603 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 12603, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6586245895421952.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|