|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.30353619669145543, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00030353619669145547, |
|
"grad_norm": 9.667811393737793, |
|
"learning_rate": 1e-05, |
|
"loss": 5.0202, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006070723933829109, |
|
"grad_norm": 10.303421974182129, |
|
"learning_rate": 2e-05, |
|
"loss": 4.7469, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0009106085900743664, |
|
"grad_norm": 7.488056182861328, |
|
"learning_rate": 3e-05, |
|
"loss": 5.0105, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0012141447867658219, |
|
"grad_norm": 4.885837078094482, |
|
"learning_rate": 4e-05, |
|
"loss": 4.3945, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0015176809834572772, |
|
"grad_norm": 3.793656587600708, |
|
"learning_rate": 5e-05, |
|
"loss": 4.0574, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0018212171801487327, |
|
"grad_norm": 3.9249916076660156, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8179, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.002124753376840188, |
|
"grad_norm": 3.4937145709991455, |
|
"learning_rate": 7e-05, |
|
"loss": 3.5297, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0024282895735316438, |
|
"grad_norm": 2.499041795730591, |
|
"learning_rate": 8e-05, |
|
"loss": 3.15, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.002731825770223099, |
|
"grad_norm": 2.0781290531158447, |
|
"learning_rate": 9e-05, |
|
"loss": 2.8658, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0030353619669145544, |
|
"grad_norm": 2.0124764442443848, |
|
"learning_rate": 0.0001, |
|
"loss": 2.6826, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00333889816360601, |
|
"grad_norm": 1.4209256172180176, |
|
"learning_rate": 9.99949377341298e-05, |
|
"loss": 2.5608, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0036424343602974654, |
|
"grad_norm": 3.176084041595459, |
|
"learning_rate": 9.99898754682596e-05, |
|
"loss": 2.2416, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.003945970556988921, |
|
"grad_norm": 1.4457614421844482, |
|
"learning_rate": 9.998481320238939e-05, |
|
"loss": 2.1925, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.004249506753680376, |
|
"grad_norm": 1.3989348411560059, |
|
"learning_rate": 9.997975093651918e-05, |
|
"loss": 2.2165, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004553042950371832, |
|
"grad_norm": 1.0647027492523193, |
|
"learning_rate": 9.997468867064899e-05, |
|
"loss": 2.3486, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0048565791470632875, |
|
"grad_norm": 1.0246940851211548, |
|
"learning_rate": 9.996962640477879e-05, |
|
"loss": 2.19, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.005160115343754742, |
|
"grad_norm": 1.029646873474121, |
|
"learning_rate": 9.996456413890858e-05, |
|
"loss": 2.4052, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.005463651540446198, |
|
"grad_norm": 1.322654128074646, |
|
"learning_rate": 9.995950187303838e-05, |
|
"loss": 2.1927, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.005767187737137654, |
|
"grad_norm": 2.061326026916504, |
|
"learning_rate": 9.995443960716817e-05, |
|
"loss": 2.4574, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.006070723933829109, |
|
"grad_norm": 1.1343607902526855, |
|
"learning_rate": 9.994937734129797e-05, |
|
"loss": 1.9598, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0063742601305205645, |
|
"grad_norm": 1.13712477684021, |
|
"learning_rate": 9.994431507542776e-05, |
|
"loss": 2.8643, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.00667779632721202, |
|
"grad_norm": 0.8220421671867371, |
|
"learning_rate": 9.993925280955756e-05, |
|
"loss": 2.0474, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.006981332523903475, |
|
"grad_norm": 0.8233473300933838, |
|
"learning_rate": 9.993419054368735e-05, |
|
"loss": 2.3597, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.007284868720594931, |
|
"grad_norm": 0.8661925196647644, |
|
"learning_rate": 9.992912827781716e-05, |
|
"loss": 2.2163, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.007588404917286387, |
|
"grad_norm": 0.7995729446411133, |
|
"learning_rate": 9.992406601194695e-05, |
|
"loss": 1.8051, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007891941113977842, |
|
"grad_norm": 0.810165286064148, |
|
"learning_rate": 9.991900374607675e-05, |
|
"loss": 1.9189, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.008195477310669297, |
|
"grad_norm": 0.8240752220153809, |
|
"learning_rate": 9.991394148020654e-05, |
|
"loss": 1.7, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.008499013507360752, |
|
"grad_norm": 1.0160635709762573, |
|
"learning_rate": 9.990887921433634e-05, |
|
"loss": 2.2964, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.008802549704052209, |
|
"grad_norm": 0.794966995716095, |
|
"learning_rate": 9.990381694846613e-05, |
|
"loss": 1.7333, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.009106085900743664, |
|
"grad_norm": 0.5594797134399414, |
|
"learning_rate": 9.989875468259593e-05, |
|
"loss": 2.0925, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009409622097435118, |
|
"grad_norm": 0.8100740909576416, |
|
"learning_rate": 9.989369241672572e-05, |
|
"loss": 2.1218, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.009713158294126575, |
|
"grad_norm": 0.7057996392250061, |
|
"learning_rate": 9.988863015085552e-05, |
|
"loss": 2.005, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01001669449081803, |
|
"grad_norm": 0.8970999121665955, |
|
"learning_rate": 9.988356788498533e-05, |
|
"loss": 2.2414, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.010320230687509485, |
|
"grad_norm": 0.6290627717971802, |
|
"learning_rate": 9.987850561911512e-05, |
|
"loss": 2.2422, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.010623766884200941, |
|
"grad_norm": 0.5665722489356995, |
|
"learning_rate": 9.987344335324492e-05, |
|
"loss": 1.9342, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.010927303080892396, |
|
"grad_norm": 0.5792561173439026, |
|
"learning_rate": 9.986838108737472e-05, |
|
"loss": 1.8733, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.011230839277583851, |
|
"grad_norm": 0.5264159440994263, |
|
"learning_rate": 9.986331882150452e-05, |
|
"loss": 2.1739, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.011534375474275308, |
|
"grad_norm": 0.5069584250450134, |
|
"learning_rate": 9.985825655563431e-05, |
|
"loss": 1.6235, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.011837911670966763, |
|
"grad_norm": 0.7689110636711121, |
|
"learning_rate": 9.985319428976411e-05, |
|
"loss": 1.711, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.012141447867658217, |
|
"grad_norm": 0.7001574635505676, |
|
"learning_rate": 9.98481320238939e-05, |
|
"loss": 1.651, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012444984064349674, |
|
"grad_norm": 0.5615801811218262, |
|
"learning_rate": 9.98430697580237e-05, |
|
"loss": 2.128, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.012748520261041129, |
|
"grad_norm": 0.8766308426856995, |
|
"learning_rate": 9.983800749215349e-05, |
|
"loss": 2.4421, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.013052056457732584, |
|
"grad_norm": 0.704547107219696, |
|
"learning_rate": 9.983294522628329e-05, |
|
"loss": 1.6921, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.01335559265442404, |
|
"grad_norm": 0.5749143362045288, |
|
"learning_rate": 9.982788296041308e-05, |
|
"loss": 2.0173, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.013659128851115495, |
|
"grad_norm": 0.7929263710975647, |
|
"learning_rate": 9.982282069454289e-05, |
|
"loss": 2.1755, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01396266504780695, |
|
"grad_norm": 1.6391934156417847, |
|
"learning_rate": 9.981775842867269e-05, |
|
"loss": 2.4995, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.014266201244498407, |
|
"grad_norm": 0.49616461992263794, |
|
"learning_rate": 9.981269616280248e-05, |
|
"loss": 2.3363, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.014569737441189862, |
|
"grad_norm": 0.614272952079773, |
|
"learning_rate": 9.980763389693227e-05, |
|
"loss": 2.0277, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.014873273637881317, |
|
"grad_norm": 0.6181132197380066, |
|
"learning_rate": 9.980257163106207e-05, |
|
"loss": 2.2867, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.015176809834572773, |
|
"grad_norm": 0.5342630743980408, |
|
"learning_rate": 9.979750936519186e-05, |
|
"loss": 1.7314, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015480346031264228, |
|
"grad_norm": 0.4582519233226776, |
|
"learning_rate": 9.979244709932166e-05, |
|
"loss": 1.9893, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.015783882227955685, |
|
"grad_norm": 0.5448606014251709, |
|
"learning_rate": 9.978738483345145e-05, |
|
"loss": 2.3266, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.01608741842464714, |
|
"grad_norm": 1.0823545455932617, |
|
"learning_rate": 9.978232256758125e-05, |
|
"loss": 2.1919, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.016390954621338594, |
|
"grad_norm": 0.5506464838981628, |
|
"learning_rate": 9.977726030171106e-05, |
|
"loss": 2.0735, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01669449081803005, |
|
"grad_norm": 0.568626344203949, |
|
"learning_rate": 9.977219803584085e-05, |
|
"loss": 2.051, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.016998027014721504, |
|
"grad_norm": 0.512907087802887, |
|
"learning_rate": 9.976713576997065e-05, |
|
"loss": 1.6473, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.017301563211412962, |
|
"grad_norm": 0.5541898012161255, |
|
"learning_rate": 9.976207350410044e-05, |
|
"loss": 1.8184, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.017605099408104417, |
|
"grad_norm": 0.5083638429641724, |
|
"learning_rate": 9.975701123823024e-05, |
|
"loss": 1.7573, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.017908635604795872, |
|
"grad_norm": 0.4722895920276642, |
|
"learning_rate": 9.975194897236003e-05, |
|
"loss": 2.0311, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.018212171801487327, |
|
"grad_norm": 0.5068002343177795, |
|
"learning_rate": 9.974688670648983e-05, |
|
"loss": 2.1245, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.018515707998178782, |
|
"grad_norm": 0.5726852416992188, |
|
"learning_rate": 9.974182444061962e-05, |
|
"loss": 2.1017, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.018819244194870237, |
|
"grad_norm": 0.5240160226821899, |
|
"learning_rate": 9.973676217474942e-05, |
|
"loss": 2.2665, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.019122780391561695, |
|
"grad_norm": 0.4728144705295563, |
|
"learning_rate": 9.973169990887921e-05, |
|
"loss": 2.0537, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.01942631658825315, |
|
"grad_norm": 0.47115418314933777, |
|
"learning_rate": 9.972663764300902e-05, |
|
"loss": 1.2815, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.019729852784944605, |
|
"grad_norm": 0.7070208191871643, |
|
"learning_rate": 9.972157537713881e-05, |
|
"loss": 1.8514, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02003338898163606, |
|
"grad_norm": 0.529069185256958, |
|
"learning_rate": 9.971651311126861e-05, |
|
"loss": 1.7602, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.020336925178327515, |
|
"grad_norm": 0.7532087564468384, |
|
"learning_rate": 9.97114508453984e-05, |
|
"loss": 2.2168, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02064046137501897, |
|
"grad_norm": 0.5654622912406921, |
|
"learning_rate": 9.97063885795282e-05, |
|
"loss": 1.9634, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.020943997571710428, |
|
"grad_norm": 0.701452910900116, |
|
"learning_rate": 9.970132631365799e-05, |
|
"loss": 2.044, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.021247533768401883, |
|
"grad_norm": 0.5750812888145447, |
|
"learning_rate": 9.969626404778779e-05, |
|
"loss": 1.8015, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.021551069965093338, |
|
"grad_norm": 0.49930402636528015, |
|
"learning_rate": 9.969120178191758e-05, |
|
"loss": 1.7998, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.021854606161784793, |
|
"grad_norm": 0.4348014295101166, |
|
"learning_rate": 9.968613951604738e-05, |
|
"loss": 1.9959, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.022158142358476247, |
|
"grad_norm": 0.5268503427505493, |
|
"learning_rate": 9.968107725017719e-05, |
|
"loss": 1.8497, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.022461678555167702, |
|
"grad_norm": 0.578822135925293, |
|
"learning_rate": 9.967601498430698e-05, |
|
"loss": 2.3277, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.02276521475185916, |
|
"grad_norm": 0.52215975522995, |
|
"learning_rate": 9.967095271843677e-05, |
|
"loss": 2.1179, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.023068750948550616, |
|
"grad_norm": 0.4557477533817291, |
|
"learning_rate": 9.966589045256657e-05, |
|
"loss": 2.0132, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.02337228714524207, |
|
"grad_norm": 0.5032123327255249, |
|
"learning_rate": 9.966082818669638e-05, |
|
"loss": 1.8608, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.023675823341933525, |
|
"grad_norm": 0.42689865827560425, |
|
"learning_rate": 9.965576592082617e-05, |
|
"loss": 2.0437, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.02397935953862498, |
|
"grad_norm": 0.44310206174850464, |
|
"learning_rate": 9.965070365495597e-05, |
|
"loss": 2.1222, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.024282895735316435, |
|
"grad_norm": 0.4377008378505707, |
|
"learning_rate": 9.964564138908576e-05, |
|
"loss": 2.0418, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.024586431932007893, |
|
"grad_norm": 0.35174912214279175, |
|
"learning_rate": 9.964057912321556e-05, |
|
"loss": 1.6931, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.024889968128699348, |
|
"grad_norm": 0.47877687215805054, |
|
"learning_rate": 9.963551685734535e-05, |
|
"loss": 1.7049, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.025193504325390803, |
|
"grad_norm": 0.4063829183578491, |
|
"learning_rate": 9.963045459147515e-05, |
|
"loss": 1.8611, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.025497040522082258, |
|
"grad_norm": 0.4149170219898224, |
|
"learning_rate": 9.962539232560496e-05, |
|
"loss": 1.9439, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.025800576718773713, |
|
"grad_norm": 0.4882602393627167, |
|
"learning_rate": 9.962033005973475e-05, |
|
"loss": 1.5723, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.026104112915465168, |
|
"grad_norm": 0.4600992202758789, |
|
"learning_rate": 9.961526779386454e-05, |
|
"loss": 2.0142, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.026407649112156626, |
|
"grad_norm": 0.43366697430610657, |
|
"learning_rate": 9.961020552799434e-05, |
|
"loss": 1.9175, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.02671118530884808, |
|
"grad_norm": 0.501487135887146, |
|
"learning_rate": 9.960514326212413e-05, |
|
"loss": 1.5043, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.027014721505539536, |
|
"grad_norm": 0.43821993470191956, |
|
"learning_rate": 9.960008099625393e-05, |
|
"loss": 1.8622, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.02731825770223099, |
|
"grad_norm": 0.4433805048465729, |
|
"learning_rate": 9.959501873038372e-05, |
|
"loss": 1.9459, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.027621793898922446, |
|
"grad_norm": 0.4686216115951538, |
|
"learning_rate": 9.958995646451352e-05, |
|
"loss": 1.7405, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0279253300956139, |
|
"grad_norm": 0.48586198687553406, |
|
"learning_rate": 9.958489419864331e-05, |
|
"loss": 2.2233, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.02822886629230536, |
|
"grad_norm": 0.4018734097480774, |
|
"learning_rate": 9.957983193277312e-05, |
|
"loss": 2.0027, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.028532402488996814, |
|
"grad_norm": 0.4996435344219208, |
|
"learning_rate": 9.957476966690292e-05, |
|
"loss": 1.5949, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.02883593868568827, |
|
"grad_norm": 0.45447826385498047, |
|
"learning_rate": 9.956970740103271e-05, |
|
"loss": 1.7636, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.029139474882379723, |
|
"grad_norm": 0.4209904372692108, |
|
"learning_rate": 9.95646451351625e-05, |
|
"loss": 1.7523, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.029443011079071178, |
|
"grad_norm": 0.3740164637565613, |
|
"learning_rate": 9.95595828692923e-05, |
|
"loss": 1.9136, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.029746547275762633, |
|
"grad_norm": 0.4169963598251343, |
|
"learning_rate": 9.95545206034221e-05, |
|
"loss": 1.9136, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.03005008347245409, |
|
"grad_norm": 0.4683006703853607, |
|
"learning_rate": 9.954945833755189e-05, |
|
"loss": 2.0657, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.030353619669145546, |
|
"grad_norm": 0.4508633017539978, |
|
"learning_rate": 9.954439607168169e-05, |
|
"loss": 2.1099, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.030657155865837, |
|
"grad_norm": 0.4136218726634979, |
|
"learning_rate": 9.953933380581148e-05, |
|
"loss": 2.0183, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.030960692062528456, |
|
"grad_norm": 0.44510790705680847, |
|
"learning_rate": 9.953427153994127e-05, |
|
"loss": 1.9307, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.031264228259219914, |
|
"grad_norm": 0.3713892698287964, |
|
"learning_rate": 9.952920927407108e-05, |
|
"loss": 1.7017, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.03156776445591137, |
|
"grad_norm": 0.47902294993400574, |
|
"learning_rate": 9.952414700820088e-05, |
|
"loss": 2.1172, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.031871300652602824, |
|
"grad_norm": 0.4492317736148834, |
|
"learning_rate": 9.951908474233067e-05, |
|
"loss": 1.9752, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03217483684929428, |
|
"grad_norm": 0.4096255302429199, |
|
"learning_rate": 9.951402247646047e-05, |
|
"loss": 1.5511, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.032478373045985734, |
|
"grad_norm": 0.39630818367004395, |
|
"learning_rate": 9.950896021059026e-05, |
|
"loss": 2.11, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.03278190924267719, |
|
"grad_norm": 0.42648032307624817, |
|
"learning_rate": 9.950389794472006e-05, |
|
"loss": 2.1784, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.033085445439368644, |
|
"grad_norm": 0.4814178943634033, |
|
"learning_rate": 9.949883567884985e-05, |
|
"loss": 1.955, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0333889816360601, |
|
"grad_norm": 0.41600191593170166, |
|
"learning_rate": 9.949377341297965e-05, |
|
"loss": 1.9163, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03369251783275155, |
|
"grad_norm": 0.4610773026943207, |
|
"learning_rate": 9.948871114710944e-05, |
|
"loss": 1.7934, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.03399605402944301, |
|
"grad_norm": 0.43061718344688416, |
|
"learning_rate": 9.948364888123925e-05, |
|
"loss": 1.9278, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.03429959022613446, |
|
"grad_norm": 0.3907497227191925, |
|
"learning_rate": 9.947858661536904e-05, |
|
"loss": 1.996, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.034603126422825925, |
|
"grad_norm": 0.3984166383743286, |
|
"learning_rate": 9.947352434949884e-05, |
|
"loss": 1.5936, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.03490666261951738, |
|
"grad_norm": 0.43406423926353455, |
|
"learning_rate": 9.946846208362863e-05, |
|
"loss": 1.8866, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.035210198816208835, |
|
"grad_norm": 0.45913639664649963, |
|
"learning_rate": 9.946339981775843e-05, |
|
"loss": 1.972, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.03551373501290029, |
|
"grad_norm": 0.42077311873435974, |
|
"learning_rate": 9.945833755188822e-05, |
|
"loss": 2.0081, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.035817271209591744, |
|
"grad_norm": 0.41479435563087463, |
|
"learning_rate": 9.945327528601802e-05, |
|
"loss": 2.0096, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0361208074062832, |
|
"grad_norm": 0.35669025778770447, |
|
"learning_rate": 9.944821302014781e-05, |
|
"loss": 2.0074, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.036424343602974654, |
|
"grad_norm": 0.4088069796562195, |
|
"learning_rate": 9.944315075427761e-05, |
|
"loss": 1.817, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03672787979966611, |
|
"grad_norm": 0.49982163310050964, |
|
"learning_rate": 9.943808848840742e-05, |
|
"loss": 1.9218, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.037031415996357564, |
|
"grad_norm": 0.39924055337905884, |
|
"learning_rate": 9.943302622253721e-05, |
|
"loss": 2.2463, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.03733495219304902, |
|
"grad_norm": 0.40462177991867065, |
|
"learning_rate": 9.942796395666702e-05, |
|
"loss": 2.0844, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.037638488389740474, |
|
"grad_norm": 0.43440741300582886, |
|
"learning_rate": 9.942290169079681e-05, |
|
"loss": 1.8808, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.03794202458643193, |
|
"grad_norm": 0.4029730260372162, |
|
"learning_rate": 9.941783942492661e-05, |
|
"loss": 1.9427, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03824556078312339, |
|
"grad_norm": 0.7807103395462036, |
|
"learning_rate": 9.94127771590564e-05, |
|
"loss": 1.9072, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.038549096979814845, |
|
"grad_norm": 0.5021561980247498, |
|
"learning_rate": 9.94077148931862e-05, |
|
"loss": 2.0582, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0388526331765063, |
|
"grad_norm": 0.5161197781562805, |
|
"learning_rate": 9.9402652627316e-05, |
|
"loss": 1.9861, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.039156169373197755, |
|
"grad_norm": 0.5553935766220093, |
|
"learning_rate": 9.939759036144579e-05, |
|
"loss": 2.1893, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.03945970556988921, |
|
"grad_norm": 0.4241655170917511, |
|
"learning_rate": 9.939252809557558e-05, |
|
"loss": 1.9722, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.039763241766580665, |
|
"grad_norm": 0.43290001153945923, |
|
"learning_rate": 9.938746582970538e-05, |
|
"loss": 1.5364, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.04006677796327212, |
|
"grad_norm": 0.40089091658592224, |
|
"learning_rate": 9.938240356383519e-05, |
|
"loss": 1.9686, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.040370314159963575, |
|
"grad_norm": 0.4152032434940338, |
|
"learning_rate": 9.937734129796498e-05, |
|
"loss": 1.913, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.04067385035665503, |
|
"grad_norm": 0.4443211555480957, |
|
"learning_rate": 9.937227903209478e-05, |
|
"loss": 2.2354, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.040977386553346484, |
|
"grad_norm": 0.41355323791503906, |
|
"learning_rate": 9.936721676622457e-05, |
|
"loss": 2.1055, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04128092275003794, |
|
"grad_norm": 0.5837479829788208, |
|
"learning_rate": 9.936215450035437e-05, |
|
"loss": 1.9085, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.041584458946729394, |
|
"grad_norm": 0.40269389748573303, |
|
"learning_rate": 9.935709223448416e-05, |
|
"loss": 2.0368, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.041887995143420856, |
|
"grad_norm": 0.5898969769477844, |
|
"learning_rate": 9.935202996861396e-05, |
|
"loss": 1.7933, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.04219153134011231, |
|
"grad_norm": 0.41117680072784424, |
|
"learning_rate": 9.934696770274375e-05, |
|
"loss": 1.7452, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.042495067536803766, |
|
"grad_norm": 0.5090368390083313, |
|
"learning_rate": 9.934190543687354e-05, |
|
"loss": 2.0141, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04279860373349522, |
|
"grad_norm": 0.4821307957172394, |
|
"learning_rate": 9.933684317100334e-05, |
|
"loss": 1.9443, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.043102139930186675, |
|
"grad_norm": 0.41939428448677063, |
|
"learning_rate": 9.933178090513315e-05, |
|
"loss": 1.7401, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.04340567612687813, |
|
"grad_norm": 0.4531096816062927, |
|
"learning_rate": 9.932671863926294e-05, |
|
"loss": 1.9944, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.043709212323569585, |
|
"grad_norm": 0.44440799951553345, |
|
"learning_rate": 9.932165637339274e-05, |
|
"loss": 1.9648, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.04401274852026104, |
|
"grad_norm": 0.36847150325775146, |
|
"learning_rate": 9.931659410752253e-05, |
|
"loss": 2.0638, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.044316284716952495, |
|
"grad_norm": 0.6394171118736267, |
|
"learning_rate": 9.931153184165233e-05, |
|
"loss": 1.9476, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.04461982091364395, |
|
"grad_norm": 0.41597506403923035, |
|
"learning_rate": 9.930646957578212e-05, |
|
"loss": 1.535, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.044923357110335405, |
|
"grad_norm": 0.5597077012062073, |
|
"learning_rate": 9.930140730991192e-05, |
|
"loss": 1.6826, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.045226893307026866, |
|
"grad_norm": 0.5532084703445435, |
|
"learning_rate": 9.929634504404171e-05, |
|
"loss": 1.8063, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.04553042950371832, |
|
"grad_norm": 0.467339426279068, |
|
"learning_rate": 9.92912827781715e-05, |
|
"loss": 2.017, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.045833965700409776, |
|
"grad_norm": 0.4054040312767029, |
|
"learning_rate": 9.928622051230131e-05, |
|
"loss": 1.7582, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.04613750189710123, |
|
"grad_norm": 1.2743823528289795, |
|
"learning_rate": 9.928115824643111e-05, |
|
"loss": 2.0202, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.046441038093792686, |
|
"grad_norm": 0.4357397258281708, |
|
"learning_rate": 9.92760959805609e-05, |
|
"loss": 1.8788, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.04674457429048414, |
|
"grad_norm": 2.8793208599090576, |
|
"learning_rate": 9.92710337146907e-05, |
|
"loss": 2.1204, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.047048110487175596, |
|
"grad_norm": 0.9585952162742615, |
|
"learning_rate": 9.92659714488205e-05, |
|
"loss": 1.9356, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.04735164668386705, |
|
"grad_norm": 0.7857603430747986, |
|
"learning_rate": 9.926090918295029e-05, |
|
"loss": 1.9097, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.047655182880558505, |
|
"grad_norm": 0.5259221792221069, |
|
"learning_rate": 9.925584691708008e-05, |
|
"loss": 2.1589, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.04795871907724996, |
|
"grad_norm": 2.793253183364868, |
|
"learning_rate": 9.925078465120988e-05, |
|
"loss": 1.7202, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.048262255273941415, |
|
"grad_norm": 0.4432888627052307, |
|
"learning_rate": 9.924572238533967e-05, |
|
"loss": 1.9898, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.04856579147063287, |
|
"grad_norm": 0.4347291588783264, |
|
"learning_rate": 9.924066011946948e-05, |
|
"loss": 1.8142, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04886932766732433, |
|
"grad_norm": 5.273514747619629, |
|
"learning_rate": 9.923559785359928e-05, |
|
"loss": 1.8665, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.04917286386401579, |
|
"grad_norm": 0.47988301515579224, |
|
"learning_rate": 9.923053558772907e-05, |
|
"loss": 1.9439, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.04947640006070724, |
|
"grad_norm": 0.3584117293357849, |
|
"learning_rate": 9.922547332185887e-05, |
|
"loss": 1.8109, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.049779936257398696, |
|
"grad_norm": 0.4074074923992157, |
|
"learning_rate": 9.922041105598866e-05, |
|
"loss": 2.1056, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.05008347245409015, |
|
"grad_norm": 3.159336566925049, |
|
"learning_rate": 9.921534879011846e-05, |
|
"loss": 1.8672, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.050387008650781606, |
|
"grad_norm": 0.38132309913635254, |
|
"learning_rate": 9.921028652424826e-05, |
|
"loss": 1.8423, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.05069054484747306, |
|
"grad_norm": 0.39241936802864075, |
|
"learning_rate": 9.920522425837806e-05, |
|
"loss": 1.5949, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.050994081044164516, |
|
"grad_norm": 0.38212037086486816, |
|
"learning_rate": 9.920016199250785e-05, |
|
"loss": 1.9669, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.05129761724085597, |
|
"grad_norm": 0.5353955030441284, |
|
"learning_rate": 9.919509972663765e-05, |
|
"loss": 2.1806, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.051601153437547426, |
|
"grad_norm": 0.4129483699798584, |
|
"learning_rate": 9.919003746076744e-05, |
|
"loss": 1.8858, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05190468963423888, |
|
"grad_norm": 0.3832380771636963, |
|
"learning_rate": 9.918497519489725e-05, |
|
"loss": 2.0321, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.052208225830930335, |
|
"grad_norm": 0.4078863859176636, |
|
"learning_rate": 9.917991292902705e-05, |
|
"loss": 1.6213, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.0525117620276218, |
|
"grad_norm": 0.38865014910697937, |
|
"learning_rate": 9.917485066315684e-05, |
|
"loss": 2.0052, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.05281529822431325, |
|
"grad_norm": 0.4339440166950226, |
|
"learning_rate": 9.916978839728664e-05, |
|
"loss": 2.2405, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.05311883442100471, |
|
"grad_norm": 0.42063045501708984, |
|
"learning_rate": 9.916472613141643e-05, |
|
"loss": 1.6529, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05342237061769616, |
|
"grad_norm": 0.4765849709510803, |
|
"learning_rate": 9.915966386554623e-05, |
|
"loss": 1.9645, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.05372590681438762, |
|
"grad_norm": 0.41431936621665955, |
|
"learning_rate": 9.915460159967602e-05, |
|
"loss": 1.9709, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.05402944301107907, |
|
"grad_norm": 0.3591434359550476, |
|
"learning_rate": 9.914953933380581e-05, |
|
"loss": 1.685, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.054332979207770526, |
|
"grad_norm": 0.45483240485191345, |
|
"learning_rate": 9.914447706793561e-05, |
|
"loss": 1.9362, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.05463651540446198, |
|
"grad_norm": 0.5468000173568726, |
|
"learning_rate": 9.91394148020654e-05, |
|
"loss": 1.6984, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.054940051601153436, |
|
"grad_norm": 0.4057190716266632, |
|
"learning_rate": 9.913435253619521e-05, |
|
"loss": 1.9887, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.05524358779784489, |
|
"grad_norm": 0.383211612701416, |
|
"learning_rate": 9.912929027032501e-05, |
|
"loss": 1.7825, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.055547123994536346, |
|
"grad_norm": 0.3480004668235779, |
|
"learning_rate": 9.91242280044548e-05, |
|
"loss": 1.8721, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.0558506601912278, |
|
"grad_norm": 0.47680413722991943, |
|
"learning_rate": 9.91191657385846e-05, |
|
"loss": 1.8113, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.05615419638791926, |
|
"grad_norm": 0.37727096676826477, |
|
"learning_rate": 9.911410347271439e-05, |
|
"loss": 1.7398, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.05645773258461072, |
|
"grad_norm": 0.47738176584243774, |
|
"learning_rate": 9.910904120684419e-05, |
|
"loss": 1.4651, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.05676126878130217, |
|
"grad_norm": 0.44533729553222656, |
|
"learning_rate": 9.910397894097398e-05, |
|
"loss": 1.5697, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.05706480497799363, |
|
"grad_norm": 0.45051974058151245, |
|
"learning_rate": 9.909891667510378e-05, |
|
"loss": 2.1577, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.05736834117468508, |
|
"grad_norm": 0.4709470272064209, |
|
"learning_rate": 9.909385440923357e-05, |
|
"loss": 2.0486, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.05767187737137654, |
|
"grad_norm": 0.4063846170902252, |
|
"learning_rate": 9.908879214336338e-05, |
|
"loss": 1.5453, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05797541356806799, |
|
"grad_norm": 0.374362587928772, |
|
"learning_rate": 9.908372987749317e-05, |
|
"loss": 1.5611, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.05827894976475945, |
|
"grad_norm": 0.4852111041545868, |
|
"learning_rate": 9.907866761162297e-05, |
|
"loss": 1.6234, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.0585824859614509, |
|
"grad_norm": 0.6863122582435608, |
|
"learning_rate": 9.907360534575276e-05, |
|
"loss": 2.1612, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.058886022158142357, |
|
"grad_norm": 0.6040588021278381, |
|
"learning_rate": 9.906854307988256e-05, |
|
"loss": 2.1092, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.05918955835483381, |
|
"grad_norm": 0.4148467779159546, |
|
"learning_rate": 9.906348081401235e-05, |
|
"loss": 2.1108, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.059493094551525266, |
|
"grad_norm": 0.36098209023475647, |
|
"learning_rate": 9.905841854814215e-05, |
|
"loss": 2.0002, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.05979663074821673, |
|
"grad_norm": 0.42360183596611023, |
|
"learning_rate": 9.905335628227194e-05, |
|
"loss": 2.3124, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.06010016694490818, |
|
"grad_norm": 0.3650914430618286, |
|
"learning_rate": 9.904829401640174e-05, |
|
"loss": 1.8778, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.06040370314159964, |
|
"grad_norm": 0.392995148897171, |
|
"learning_rate": 9.904323175053155e-05, |
|
"loss": 2.16, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.06070723933829109, |
|
"grad_norm": 0.46390387415885925, |
|
"learning_rate": 9.903816948466134e-05, |
|
"loss": 1.8695, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06101077553498255, |
|
"grad_norm": 0.3954870402812958, |
|
"learning_rate": 9.903310721879114e-05, |
|
"loss": 1.9233, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.061314311731674, |
|
"grad_norm": 0.3650193214416504, |
|
"learning_rate": 9.902804495292093e-05, |
|
"loss": 2.2504, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.06161784792836546, |
|
"grad_norm": 0.3582104742527008, |
|
"learning_rate": 9.902298268705073e-05, |
|
"loss": 1.9303, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.06192138412505691, |
|
"grad_norm": 0.35688868165016174, |
|
"learning_rate": 9.901792042118052e-05, |
|
"loss": 1.7078, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.06222492032174837, |
|
"grad_norm": 0.3666802942752838, |
|
"learning_rate": 9.901285815531031e-05, |
|
"loss": 1.941, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.06252845651843983, |
|
"grad_norm": 0.42375093698501587, |
|
"learning_rate": 9.900779588944011e-05, |
|
"loss": 2.0858, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.06283199271513128, |
|
"grad_norm": 0.3913770318031311, |
|
"learning_rate": 9.90027336235699e-05, |
|
"loss": 2.1423, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.06313552891182274, |
|
"grad_norm": 0.4101809859275818, |
|
"learning_rate": 9.89976713576997e-05, |
|
"loss": 2.0497, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.06343906510851419, |
|
"grad_norm": 0.3696439564228058, |
|
"learning_rate": 9.899260909182951e-05, |
|
"loss": 1.9692, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.06374260130520565, |
|
"grad_norm": 0.3725574016571045, |
|
"learning_rate": 9.89875468259593e-05, |
|
"loss": 2.2053, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0640461375018971, |
|
"grad_norm": 0.4886903166770935, |
|
"learning_rate": 9.898248456008911e-05, |
|
"loss": 1.8981, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.06434967369858856, |
|
"grad_norm": 0.4423249661922455, |
|
"learning_rate": 9.89774222942189e-05, |
|
"loss": 1.9058, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.06465320989528, |
|
"grad_norm": 0.4045765697956085, |
|
"learning_rate": 9.89723600283487e-05, |
|
"loss": 1.8056, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.06495674609197147, |
|
"grad_norm": 0.43866047263145447, |
|
"learning_rate": 9.89672977624785e-05, |
|
"loss": 1.6315, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.06526028228866293, |
|
"grad_norm": 0.524714469909668, |
|
"learning_rate": 9.896223549660829e-05, |
|
"loss": 2.0156, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.06556381848535438, |
|
"grad_norm": 0.3752996325492859, |
|
"learning_rate": 9.895717323073808e-05, |
|
"loss": 2.2768, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.06586735468204584, |
|
"grad_norm": 0.4371670186519623, |
|
"learning_rate": 9.895211096486788e-05, |
|
"loss": 2.0755, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.06617089087873729, |
|
"grad_norm": 0.3751063644886017, |
|
"learning_rate": 9.894704869899767e-05, |
|
"loss": 2.2451, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.06647442707542875, |
|
"grad_norm": 0.6649600267410278, |
|
"learning_rate": 9.894198643312747e-05, |
|
"loss": 1.9835, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.0667779632721202, |
|
"grad_norm": 0.3941735625267029, |
|
"learning_rate": 9.893692416725728e-05, |
|
"loss": 2.0203, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06708149946881166, |
|
"grad_norm": 0.41888293623924255, |
|
"learning_rate": 9.893186190138707e-05, |
|
"loss": 1.7572, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.0673850356655031, |
|
"grad_norm": 0.4820149838924408, |
|
"learning_rate": 9.892679963551687e-05, |
|
"loss": 2.0591, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.06768857186219457, |
|
"grad_norm": 0.3516736626625061, |
|
"learning_rate": 9.892173736964666e-05, |
|
"loss": 1.9398, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.06799210805888602, |
|
"grad_norm": 0.3873218894004822, |
|
"learning_rate": 9.891667510377646e-05, |
|
"loss": 1.6389, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.06829564425557748, |
|
"grad_norm": 0.3793487846851349, |
|
"learning_rate": 9.891161283790625e-05, |
|
"loss": 2.0075, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.06859918045226893, |
|
"grad_norm": 0.38987675309181213, |
|
"learning_rate": 9.890655057203605e-05, |
|
"loss": 2.0903, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.06890271664896039, |
|
"grad_norm": 0.4293549358844757, |
|
"learning_rate": 9.890148830616584e-05, |
|
"loss": 2.2099, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.06920625284565185, |
|
"grad_norm": 0.39895692467689514, |
|
"learning_rate": 9.889642604029564e-05, |
|
"loss": 1.8615, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.0695097890423433, |
|
"grad_norm": 0.4543936252593994, |
|
"learning_rate": 9.889136377442544e-05, |
|
"loss": 2.0828, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.06981332523903476, |
|
"grad_norm": 0.448477566242218, |
|
"learning_rate": 9.888630150855524e-05, |
|
"loss": 1.5524, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07011686143572621, |
|
"grad_norm": 0.428975373506546, |
|
"learning_rate": 9.888123924268503e-05, |
|
"loss": 1.3828, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.07042039763241767, |
|
"grad_norm": 0.42287349700927734, |
|
"learning_rate": 9.887617697681483e-05, |
|
"loss": 2.096, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.07072393382910912, |
|
"grad_norm": 0.43614649772644043, |
|
"learning_rate": 9.887111471094462e-05, |
|
"loss": 1.8238, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.07102747002580058, |
|
"grad_norm": 0.47309553623199463, |
|
"learning_rate": 9.886605244507442e-05, |
|
"loss": 2.3526, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.07133100622249203, |
|
"grad_norm": 0.9558483362197876, |
|
"learning_rate": 9.886099017920421e-05, |
|
"loss": 1.9816, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.07163454241918349, |
|
"grad_norm": 0.3529858887195587, |
|
"learning_rate": 9.885592791333401e-05, |
|
"loss": 2.0314, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.07193807861587494, |
|
"grad_norm": 0.37652599811553955, |
|
"learning_rate": 9.88508656474638e-05, |
|
"loss": 1.9381, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.0722416148125664, |
|
"grad_norm": 0.40783143043518066, |
|
"learning_rate": 9.884580338159361e-05, |
|
"loss": 1.966, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.07254515100925786, |
|
"grad_norm": 0.4160328805446625, |
|
"learning_rate": 9.88407411157234e-05, |
|
"loss": 1.8176, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.07284868720594931, |
|
"grad_norm": 0.4397304952144623, |
|
"learning_rate": 9.88356788498532e-05, |
|
"loss": 1.6766, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07315222340264077, |
|
"grad_norm": 0.42549702525138855, |
|
"learning_rate": 9.8830616583983e-05, |
|
"loss": 2.1176, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.07345575959933222, |
|
"grad_norm": 0.3747939169406891, |
|
"learning_rate": 9.882555431811279e-05, |
|
"loss": 1.5494, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.07375929579602368, |
|
"grad_norm": 3.4551990032196045, |
|
"learning_rate": 9.882049205224258e-05, |
|
"loss": 2.0336, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.07406283199271513, |
|
"grad_norm": 1.5632964372634888, |
|
"learning_rate": 9.881542978637238e-05, |
|
"loss": 1.7452, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.07436636818940659, |
|
"grad_norm": 0.41575855016708374, |
|
"learning_rate": 9.881036752050217e-05, |
|
"loss": 2.0243, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.07466990438609804, |
|
"grad_norm": 0.44168713688850403, |
|
"learning_rate": 9.880530525463197e-05, |
|
"loss": 2.0022, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.0749734405827895, |
|
"grad_norm": 0.46640321612358093, |
|
"learning_rate": 9.880024298876176e-05, |
|
"loss": 1.555, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.07527697677948095, |
|
"grad_norm": 0.3622835576534271, |
|
"learning_rate": 9.879518072289157e-05, |
|
"loss": 1.876, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.07558051297617241, |
|
"grad_norm": 0.6277987957000732, |
|
"learning_rate": 9.879011845702137e-05, |
|
"loss": 2.2753, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.07588404917286386, |
|
"grad_norm": 0.40246644616127014, |
|
"learning_rate": 9.878505619115116e-05, |
|
"loss": 1.5991, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07618758536955532, |
|
"grad_norm": 0.38388529419898987, |
|
"learning_rate": 9.877999392528096e-05, |
|
"loss": 1.9226, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.07649112156624678, |
|
"grad_norm": 0.39985090494155884, |
|
"learning_rate": 9.877493165941075e-05, |
|
"loss": 2.0722, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.07679465776293823, |
|
"grad_norm": 0.3872128427028656, |
|
"learning_rate": 9.876986939354055e-05, |
|
"loss": 1.9132, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.07709819395962969, |
|
"grad_norm": 0.3665171265602112, |
|
"learning_rate": 9.876480712767034e-05, |
|
"loss": 1.6244, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.07740173015632114, |
|
"grad_norm": 0.4011310040950775, |
|
"learning_rate": 9.875974486180015e-05, |
|
"loss": 2.1289, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.0777052663530126, |
|
"grad_norm": 0.35013166069984436, |
|
"learning_rate": 9.875468259592994e-05, |
|
"loss": 1.9738, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.07800880254970405, |
|
"grad_norm": 0.48468607664108276, |
|
"learning_rate": 9.874962033005974e-05, |
|
"loss": 2.1368, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.07831233874639551, |
|
"grad_norm": 0.5015551447868347, |
|
"learning_rate": 9.874455806418953e-05, |
|
"loss": 2.1218, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.07861587494308696, |
|
"grad_norm": 0.41915133595466614, |
|
"learning_rate": 9.873949579831934e-05, |
|
"loss": 2.0052, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.07891941113977842, |
|
"grad_norm": 0.4414760172367096, |
|
"learning_rate": 9.873443353244914e-05, |
|
"loss": 1.7249, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07922294733646987, |
|
"grad_norm": 0.47259169816970825, |
|
"learning_rate": 9.872937126657893e-05, |
|
"loss": 2.1041, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.07952648353316133, |
|
"grad_norm": 0.3689124882221222, |
|
"learning_rate": 9.872430900070873e-05, |
|
"loss": 1.8956, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.07983001972985279, |
|
"grad_norm": 0.3948320150375366, |
|
"learning_rate": 9.871924673483852e-05, |
|
"loss": 1.9211, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.08013355592654424, |
|
"grad_norm": 0.4235248267650604, |
|
"learning_rate": 9.871418446896832e-05, |
|
"loss": 1.7115, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.0804370921232357, |
|
"grad_norm": 0.48399198055267334, |
|
"learning_rate": 9.870912220309811e-05, |
|
"loss": 1.77, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.08074062831992715, |
|
"grad_norm": 0.34047526121139526, |
|
"learning_rate": 9.87040599372279e-05, |
|
"loss": 1.7189, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.08104416451661861, |
|
"grad_norm": 0.47203269600868225, |
|
"learning_rate": 9.86989976713577e-05, |
|
"loss": 1.7674, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.08134770071331006, |
|
"grad_norm": 0.3752756118774414, |
|
"learning_rate": 9.869393540548751e-05, |
|
"loss": 1.8716, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.08165123691000152, |
|
"grad_norm": 0.3437153697013855, |
|
"learning_rate": 9.86888731396173e-05, |
|
"loss": 1.9824, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.08195477310669297, |
|
"grad_norm": 0.4854094088077545, |
|
"learning_rate": 9.86838108737471e-05, |
|
"loss": 1.4385, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08225830930338443, |
|
"grad_norm": 0.37674829363822937, |
|
"learning_rate": 9.86787486078769e-05, |
|
"loss": 1.7877, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.08256184550007588, |
|
"grad_norm": 0.4215140640735626, |
|
"learning_rate": 9.867368634200669e-05, |
|
"loss": 2.1854, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.08286538169676734, |
|
"grad_norm": 0.3680359423160553, |
|
"learning_rate": 9.866862407613648e-05, |
|
"loss": 2.104, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.08316891789345879, |
|
"grad_norm": 0.4195649325847626, |
|
"learning_rate": 9.866356181026628e-05, |
|
"loss": 1.469, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.08347245409015025, |
|
"grad_norm": 0.480640709400177, |
|
"learning_rate": 9.865849954439607e-05, |
|
"loss": 1.8329, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.08377599028684171, |
|
"grad_norm": 0.34760695695877075, |
|
"learning_rate": 9.865343727852587e-05, |
|
"loss": 1.9495, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.08407952648353316, |
|
"grad_norm": 0.3803161680698395, |
|
"learning_rate": 9.864837501265568e-05, |
|
"loss": 1.9294, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.08438306268022462, |
|
"grad_norm": 0.41739675402641296, |
|
"learning_rate": 9.864331274678547e-05, |
|
"loss": 2.059, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.08468659887691607, |
|
"grad_norm": 0.3807448744773865, |
|
"learning_rate": 9.863825048091527e-05, |
|
"loss": 1.9741, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.08499013507360753, |
|
"grad_norm": 0.3610997200012207, |
|
"learning_rate": 9.863318821504506e-05, |
|
"loss": 1.9815, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08529367127029898, |
|
"grad_norm": 0.3797460198402405, |
|
"learning_rate": 9.862812594917485e-05, |
|
"loss": 2.1394, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.08559720746699044, |
|
"grad_norm": 0.3922887444496155, |
|
"learning_rate": 9.862306368330465e-05, |
|
"loss": 2.184, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.08590074366368189, |
|
"grad_norm": 0.38251930475234985, |
|
"learning_rate": 9.861800141743444e-05, |
|
"loss": 2.0186, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.08620427986037335, |
|
"grad_norm": 0.35968562960624695, |
|
"learning_rate": 9.861293915156424e-05, |
|
"loss": 2.0, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.0865078160570648, |
|
"grad_norm": 0.37149590253829956, |
|
"learning_rate": 9.860787688569403e-05, |
|
"loss": 1.7941, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.08681135225375626, |
|
"grad_norm": 0.36890628933906555, |
|
"learning_rate": 9.860281461982383e-05, |
|
"loss": 1.906, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.08711488845044772, |
|
"grad_norm": 0.36025917530059814, |
|
"learning_rate": 9.859775235395364e-05, |
|
"loss": 1.9655, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.08741842464713917, |
|
"grad_norm": 0.3704364001750946, |
|
"learning_rate": 9.859269008808343e-05, |
|
"loss": 1.8657, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.08772196084383063, |
|
"grad_norm": 0.5996513962745667, |
|
"learning_rate": 9.858762782221323e-05, |
|
"loss": 1.7448, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.08802549704052208, |
|
"grad_norm": 0.3615630269050598, |
|
"learning_rate": 9.858256555634302e-05, |
|
"loss": 1.9007, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08832903323721354, |
|
"grad_norm": 0.36014246940612793, |
|
"learning_rate": 9.857750329047282e-05, |
|
"loss": 1.927, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.08863256943390499, |
|
"grad_norm": 0.5038754940032959, |
|
"learning_rate": 9.857244102460261e-05, |
|
"loss": 1.6613, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.08893610563059645, |
|
"grad_norm": 0.3880213797092438, |
|
"learning_rate": 9.85673787587324e-05, |
|
"loss": 1.5563, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.0892396418272879, |
|
"grad_norm": 0.43225082755088806, |
|
"learning_rate": 9.85623164928622e-05, |
|
"loss": 1.5534, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.08954317802397936, |
|
"grad_norm": 0.44342055916786194, |
|
"learning_rate": 9.8557254226992e-05, |
|
"loss": 1.6211, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.08984671422067081, |
|
"grad_norm": 0.42114123702049255, |
|
"learning_rate": 9.85521919611218e-05, |
|
"loss": 1.9731, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.09015025041736227, |
|
"grad_norm": 0.43151113390922546, |
|
"learning_rate": 9.85471296952516e-05, |
|
"loss": 1.9519, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.09045378661405373, |
|
"grad_norm": 0.38092517852783203, |
|
"learning_rate": 9.85420674293814e-05, |
|
"loss": 2.0973, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.09075732281074518, |
|
"grad_norm": 0.40729570388793945, |
|
"learning_rate": 9.853700516351119e-05, |
|
"loss": 1.4395, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.09106085900743664, |
|
"grad_norm": 0.3631846308708191, |
|
"learning_rate": 9.8531942897641e-05, |
|
"loss": 1.2255, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09136439520412809, |
|
"grad_norm": 0.37764397263526917, |
|
"learning_rate": 9.852688063177079e-05, |
|
"loss": 1.9941, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.09166793140081955, |
|
"grad_norm": 0.3755379319190979, |
|
"learning_rate": 9.852181836590059e-05, |
|
"loss": 1.7154, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.091971467597511, |
|
"grad_norm": 0.39003854990005493, |
|
"learning_rate": 9.851675610003038e-05, |
|
"loss": 1.928, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.09227500379420246, |
|
"grad_norm": 0.39592432975769043, |
|
"learning_rate": 9.851169383416018e-05, |
|
"loss": 2.1913, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.09257853999089391, |
|
"grad_norm": 0.4315894842147827, |
|
"learning_rate": 9.850663156828997e-05, |
|
"loss": 1.6432, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.09288207618758537, |
|
"grad_norm": 0.4103511571884155, |
|
"learning_rate": 9.850156930241977e-05, |
|
"loss": 1.9944, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.09318561238427682, |
|
"grad_norm": 0.4236547350883484, |
|
"learning_rate": 9.849650703654957e-05, |
|
"loss": 1.875, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.09348914858096828, |
|
"grad_norm": 0.41012468934059143, |
|
"learning_rate": 9.849144477067937e-05, |
|
"loss": 2.008, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.09379268477765973, |
|
"grad_norm": 0.35538622736930847, |
|
"learning_rate": 9.848638250480916e-05, |
|
"loss": 1.7322, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.09409622097435119, |
|
"grad_norm": 0.3874755799770355, |
|
"learning_rate": 9.848132023893896e-05, |
|
"loss": 1.9818, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09439975717104265, |
|
"grad_norm": 0.42444977164268494, |
|
"learning_rate": 9.847625797306875e-05, |
|
"loss": 2.1606, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.0947032933677341, |
|
"grad_norm": 0.5855305194854736, |
|
"learning_rate": 9.847119570719855e-05, |
|
"loss": 1.4887, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.09500682956442556, |
|
"grad_norm": 0.35223227739334106, |
|
"learning_rate": 9.846613344132834e-05, |
|
"loss": 2.0025, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.09531036576111701, |
|
"grad_norm": 0.4013148844242096, |
|
"learning_rate": 9.846107117545814e-05, |
|
"loss": 1.9702, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.09561390195780847, |
|
"grad_norm": 0.5038349032402039, |
|
"learning_rate": 9.845600890958793e-05, |
|
"loss": 2.1532, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.09591743815449992, |
|
"grad_norm": 0.4826093018054962, |
|
"learning_rate": 9.845094664371774e-05, |
|
"loss": 2.0118, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.09622097435119138, |
|
"grad_norm": 0.41135913133621216, |
|
"learning_rate": 9.844588437784754e-05, |
|
"loss": 2.0707, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.09652451054788283, |
|
"grad_norm": 0.4353053569793701, |
|
"learning_rate": 9.844082211197733e-05, |
|
"loss": 2.104, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.09682804674457429, |
|
"grad_norm": 0.4192908704280853, |
|
"learning_rate": 9.843575984610712e-05, |
|
"loss": 1.9489, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.09713158294126574, |
|
"grad_norm": 0.380562424659729, |
|
"learning_rate": 9.843069758023692e-05, |
|
"loss": 1.3602, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0974351191379572, |
|
"grad_norm": 0.3394995331764221, |
|
"learning_rate": 9.842563531436671e-05, |
|
"loss": 2.2161, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.09773865533464866, |
|
"grad_norm": 0.3419237434864044, |
|
"learning_rate": 9.842057304849651e-05, |
|
"loss": 1.7146, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.09804219153134011, |
|
"grad_norm": 0.3590264618396759, |
|
"learning_rate": 9.84155107826263e-05, |
|
"loss": 1.8654, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.09834572772803157, |
|
"grad_norm": 0.40006300806999207, |
|
"learning_rate": 9.84104485167561e-05, |
|
"loss": 1.5787, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.09864926392472302, |
|
"grad_norm": 0.33313074707984924, |
|
"learning_rate": 9.84053862508859e-05, |
|
"loss": 1.8653, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.09895280012141448, |
|
"grad_norm": 0.39681655168533325, |
|
"learning_rate": 9.84003239850157e-05, |
|
"loss": 2.178, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.09925633631810593, |
|
"grad_norm": 0.41945868730545044, |
|
"learning_rate": 9.83952617191455e-05, |
|
"loss": 1.8324, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.09955987251479739, |
|
"grad_norm": 0.3957304060459137, |
|
"learning_rate": 9.839019945327529e-05, |
|
"loss": 1.6468, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.09986340871148884, |
|
"grad_norm": 0.35814937949180603, |
|
"learning_rate": 9.838513718740509e-05, |
|
"loss": 1.6492, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.1001669449081803, |
|
"grad_norm": 0.38410916924476624, |
|
"learning_rate": 9.838007492153488e-05, |
|
"loss": 1.7223, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10047048110487175, |
|
"grad_norm": 0.38490885496139526, |
|
"learning_rate": 9.837501265566468e-05, |
|
"loss": 2.0166, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.10077401730156321, |
|
"grad_norm": 0.38943415880203247, |
|
"learning_rate": 9.836995038979447e-05, |
|
"loss": 1.371, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.10107755349825466, |
|
"grad_norm": 0.39741018414497375, |
|
"learning_rate": 9.836488812392427e-05, |
|
"loss": 1.6233, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.10138108969494612, |
|
"grad_norm": 0.4663957357406616, |
|
"learning_rate": 9.835982585805406e-05, |
|
"loss": 1.746, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.10168462589163758, |
|
"grad_norm": 0.37118905782699585, |
|
"learning_rate": 9.835476359218387e-05, |
|
"loss": 1.9684, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.10198816208832903, |
|
"grad_norm": 0.40275588631629944, |
|
"learning_rate": 9.834970132631366e-05, |
|
"loss": 1.9551, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.1022916982850205, |
|
"grad_norm": 0.4336283206939697, |
|
"learning_rate": 9.834463906044346e-05, |
|
"loss": 2.0711, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.10259523448171194, |
|
"grad_norm": 0.35735735297203064, |
|
"learning_rate": 9.833957679457325e-05, |
|
"loss": 2.1397, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.1028987706784034, |
|
"grad_norm": 0.37825390696525574, |
|
"learning_rate": 9.833451452870305e-05, |
|
"loss": 1.7494, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.10320230687509485, |
|
"grad_norm": 0.3384961783885956, |
|
"learning_rate": 9.832945226283284e-05, |
|
"loss": 2.0197, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.10350584307178631, |
|
"grad_norm": 0.46276888251304626, |
|
"learning_rate": 9.832438999696264e-05, |
|
"loss": 1.797, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.10380937926847776, |
|
"grad_norm": 0.3685421347618103, |
|
"learning_rate": 9.831932773109243e-05, |
|
"loss": 1.9301, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.10411291546516922, |
|
"grad_norm": 0.38931936025619507, |
|
"learning_rate": 9.831426546522223e-05, |
|
"loss": 1.9623, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.10441645166186067, |
|
"grad_norm": 0.46678805351257324, |
|
"learning_rate": 9.830920319935204e-05, |
|
"loss": 1.6708, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.10471998785855213, |
|
"grad_norm": 0.4199204444885254, |
|
"learning_rate": 9.830414093348183e-05, |
|
"loss": 1.8014, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.1050235240552436, |
|
"grad_norm": 0.41024506092071533, |
|
"learning_rate": 9.829907866761164e-05, |
|
"loss": 1.8829, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.10532706025193504, |
|
"grad_norm": 0.5271286368370056, |
|
"learning_rate": 9.829401640174143e-05, |
|
"loss": 1.7796, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.1056305964486265, |
|
"grad_norm": 0.3593878448009491, |
|
"learning_rate": 9.828895413587123e-05, |
|
"loss": 2.0697, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.10593413264531795, |
|
"grad_norm": 0.44404372572898865, |
|
"learning_rate": 9.828389187000102e-05, |
|
"loss": 2.3235, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.10623766884200941, |
|
"grad_norm": 0.4072231650352478, |
|
"learning_rate": 9.827882960413082e-05, |
|
"loss": 1.5391, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10654120503870086, |
|
"grad_norm": 0.3924303352832794, |
|
"learning_rate": 9.827376733826061e-05, |
|
"loss": 2.0649, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.10684474123539232, |
|
"grad_norm": 0.3815264105796814, |
|
"learning_rate": 9.826870507239041e-05, |
|
"loss": 1.5821, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.10714827743208377, |
|
"grad_norm": 0.40832409262657166, |
|
"learning_rate": 9.82636428065202e-05, |
|
"loss": 2.1135, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.10745181362877523, |
|
"grad_norm": 0.40270155668258667, |
|
"learning_rate": 9.825858054065e-05, |
|
"loss": 1.6561, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.10775534982546668, |
|
"grad_norm": 0.38295283913612366, |
|
"learning_rate": 9.82535182747798e-05, |
|
"loss": 1.8938, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.10805888602215814, |
|
"grad_norm": 0.41975417733192444, |
|
"learning_rate": 9.82484560089096e-05, |
|
"loss": 1.8605, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.10836242221884959, |
|
"grad_norm": 0.41388946771621704, |
|
"learning_rate": 9.82433937430394e-05, |
|
"loss": 1.812, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.10866595841554105, |
|
"grad_norm": 0.3470607101917267, |
|
"learning_rate": 9.823833147716919e-05, |
|
"loss": 2.1914, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.10896949461223251, |
|
"grad_norm": 0.4417155385017395, |
|
"learning_rate": 9.823326921129898e-05, |
|
"loss": 1.7644, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.10927303080892396, |
|
"grad_norm": 0.33910539746284485, |
|
"learning_rate": 9.822820694542878e-05, |
|
"loss": 1.8821, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.10957656700561542, |
|
"grad_norm": 0.36742356419563293, |
|
"learning_rate": 9.822314467955857e-05, |
|
"loss": 1.9684, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.10988010320230687, |
|
"grad_norm": 0.407844603061676, |
|
"learning_rate": 9.821808241368837e-05, |
|
"loss": 1.8797, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.11018363939899833, |
|
"grad_norm": 0.4090898036956787, |
|
"learning_rate": 9.821302014781816e-05, |
|
"loss": 1.8401, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.11048717559568978, |
|
"grad_norm": 0.3852720260620117, |
|
"learning_rate": 9.820795788194796e-05, |
|
"loss": 1.6887, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.11079071179238124, |
|
"grad_norm": 0.4147186875343323, |
|
"learning_rate": 9.820289561607777e-05, |
|
"loss": 1.7263, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.11109424798907269, |
|
"grad_norm": 0.7032086849212646, |
|
"learning_rate": 9.819783335020756e-05, |
|
"loss": 1.5382, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.11139778418576415, |
|
"grad_norm": 0.3547534644603729, |
|
"learning_rate": 9.819277108433736e-05, |
|
"loss": 1.5988, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.1117013203824556, |
|
"grad_norm": 0.45878785848617554, |
|
"learning_rate": 9.818770881846715e-05, |
|
"loss": 2.2467, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.11200485657914706, |
|
"grad_norm": 0.39183077216148376, |
|
"learning_rate": 9.818264655259695e-05, |
|
"loss": 1.848, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.11230839277583853, |
|
"grad_norm": 0.3735283315181732, |
|
"learning_rate": 9.817758428672674e-05, |
|
"loss": 1.6925, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11261192897252997, |
|
"grad_norm": 0.3878265917301178, |
|
"learning_rate": 9.817252202085654e-05, |
|
"loss": 2.04, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.11291546516922144, |
|
"grad_norm": 0.38978812098503113, |
|
"learning_rate": 9.816745975498633e-05, |
|
"loss": 1.869, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.11321900136591288, |
|
"grad_norm": 0.39212337136268616, |
|
"learning_rate": 9.816239748911613e-05, |
|
"loss": 2.0549, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.11352253756260434, |
|
"grad_norm": 0.39528506994247437, |
|
"learning_rate": 9.815733522324593e-05, |
|
"loss": 1.5653, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.11382607375929579, |
|
"grad_norm": 0.4226018786430359, |
|
"learning_rate": 9.815227295737573e-05, |
|
"loss": 1.6231, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.11412960995598725, |
|
"grad_norm": 0.3577810823917389, |
|
"learning_rate": 9.814721069150552e-05, |
|
"loss": 1.9599, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.1144331461526787, |
|
"grad_norm": 0.33580708503723145, |
|
"learning_rate": 9.814214842563532e-05, |
|
"loss": 2.0419, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.11473668234937016, |
|
"grad_norm": 0.38860392570495605, |
|
"learning_rate": 9.813708615976511e-05, |
|
"loss": 1.7186, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.11504021854606161, |
|
"grad_norm": 0.38994479179382324, |
|
"learning_rate": 9.813202389389491e-05, |
|
"loss": 2.1848, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.11534375474275307, |
|
"grad_norm": 0.3947262763977051, |
|
"learning_rate": 9.81269616280247e-05, |
|
"loss": 2.1868, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.11564729093944452, |
|
"grad_norm": 0.3112877607345581, |
|
"learning_rate": 9.81218993621545e-05, |
|
"loss": 1.8604, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.11595082713613598, |
|
"grad_norm": 0.375689834356308, |
|
"learning_rate": 9.811683709628429e-05, |
|
"loss": 2.0418, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.11625436333282745, |
|
"grad_norm": 0.34537243843078613, |
|
"learning_rate": 9.81117748304141e-05, |
|
"loss": 1.8874, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.1165578995295189, |
|
"grad_norm": 0.5077370405197144, |
|
"learning_rate": 9.81067125645439e-05, |
|
"loss": 1.7497, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.11686143572621036, |
|
"grad_norm": 0.3703441023826599, |
|
"learning_rate": 9.810165029867369e-05, |
|
"loss": 1.781, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.1171649719229018, |
|
"grad_norm": 0.4386610984802246, |
|
"learning_rate": 9.809658803280348e-05, |
|
"loss": 1.8428, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.11746850811959327, |
|
"grad_norm": 0.37781745195388794, |
|
"learning_rate": 9.809152576693328e-05, |
|
"loss": 2.0384, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.11777204431628471, |
|
"grad_norm": 0.38956716656684875, |
|
"learning_rate": 9.808646350106307e-05, |
|
"loss": 2.3534, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.11807558051297617, |
|
"grad_norm": 0.3444838523864746, |
|
"learning_rate": 9.808140123519288e-05, |
|
"loss": 1.921, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.11837911670966762, |
|
"grad_norm": 0.39881742000579834, |
|
"learning_rate": 9.807633896932268e-05, |
|
"loss": 2.1758, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11868265290635908, |
|
"grad_norm": 0.384226530790329, |
|
"learning_rate": 9.807127670345247e-05, |
|
"loss": 1.7651, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.11898618910305053, |
|
"grad_norm": 0.36255109310150146, |
|
"learning_rate": 9.806621443758227e-05, |
|
"loss": 1.8122, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.119289725299742, |
|
"grad_norm": 0.3627421259880066, |
|
"learning_rate": 9.806115217171206e-05, |
|
"loss": 1.6304, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.11959326149643346, |
|
"grad_norm": 0.8936781883239746, |
|
"learning_rate": 9.805608990584187e-05, |
|
"loss": 1.8827, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.1198967976931249, |
|
"grad_norm": 0.5008642673492432, |
|
"learning_rate": 9.805102763997166e-05, |
|
"loss": 1.3597, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.12020033388981637, |
|
"grad_norm": 0.4444289207458496, |
|
"learning_rate": 9.804596537410146e-05, |
|
"loss": 2.1768, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.12050387008650781, |
|
"grad_norm": 0.3963356912136078, |
|
"learning_rate": 9.804090310823125e-05, |
|
"loss": 1.8373, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.12080740628319928, |
|
"grad_norm": 0.44095271825790405, |
|
"learning_rate": 9.803584084236105e-05, |
|
"loss": 1.7893, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.12111094247989072, |
|
"grad_norm": 0.4162418246269226, |
|
"learning_rate": 9.803077857649084e-05, |
|
"loss": 1.7482, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.12141447867658219, |
|
"grad_norm": 0.3853035271167755, |
|
"learning_rate": 9.802571631062064e-05, |
|
"loss": 1.6274, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12171801487327363, |
|
"grad_norm": 1.1697463989257812, |
|
"learning_rate": 9.802065404475043e-05, |
|
"loss": 2.2254, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.1220215510699651, |
|
"grad_norm": 0.3899803161621094, |
|
"learning_rate": 9.801559177888023e-05, |
|
"loss": 1.9754, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.12232508726665654, |
|
"grad_norm": 0.43946412205696106, |
|
"learning_rate": 9.801052951301002e-05, |
|
"loss": 2.1184, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.122628623463348, |
|
"grad_norm": 0.46882718801498413, |
|
"learning_rate": 9.800546724713983e-05, |
|
"loss": 1.4423, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.12293215966003945, |
|
"grad_norm": 0.4379485547542572, |
|
"learning_rate": 9.800040498126963e-05, |
|
"loss": 2.0614, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.12323569585673091, |
|
"grad_norm": 0.3837740123271942, |
|
"learning_rate": 9.799534271539942e-05, |
|
"loss": 1.9974, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.12353923205342238, |
|
"grad_norm": 0.35403695702552795, |
|
"learning_rate": 9.799028044952922e-05, |
|
"loss": 1.5693, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.12384276825011382, |
|
"grad_norm": 0.4070426821708679, |
|
"learning_rate": 9.798521818365901e-05, |
|
"loss": 1.8704, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.12414630444680529, |
|
"grad_norm": 0.4301077425479889, |
|
"learning_rate": 9.79801559177888e-05, |
|
"loss": 1.077, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.12444984064349673, |
|
"grad_norm": 0.37687429785728455, |
|
"learning_rate": 9.79750936519186e-05, |
|
"loss": 1.7323, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1247533768401882, |
|
"grad_norm": 0.37393873929977417, |
|
"learning_rate": 9.79700313860484e-05, |
|
"loss": 1.9532, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.12505691303687966, |
|
"grad_norm": 0.4518846869468689, |
|
"learning_rate": 9.796496912017819e-05, |
|
"loss": 2.0123, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.1253604492335711, |
|
"grad_norm": 0.39417609572410583, |
|
"learning_rate": 9.7959906854308e-05, |
|
"loss": 2.2669, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.12566398543026255, |
|
"grad_norm": 0.3802976608276367, |
|
"learning_rate": 9.795484458843779e-05, |
|
"loss": 2.0506, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.12596752162695402, |
|
"grad_norm": 1.3118431568145752, |
|
"learning_rate": 9.794978232256759e-05, |
|
"loss": 2.2551, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.12627105782364548, |
|
"grad_norm": 0.9459638595581055, |
|
"learning_rate": 9.794472005669738e-05, |
|
"loss": 1.7829, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.1265745940203369, |
|
"grad_norm": 0.571232795715332, |
|
"learning_rate": 9.793965779082718e-05, |
|
"loss": 1.7768, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.12687813021702837, |
|
"grad_norm": 0.3973385989665985, |
|
"learning_rate": 9.793459552495697e-05, |
|
"loss": 1.88, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.12718166641371983, |
|
"grad_norm": 0.3883122503757477, |
|
"learning_rate": 9.792953325908677e-05, |
|
"loss": 1.9592, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.1274852026104113, |
|
"grad_norm": 0.40379586815834045, |
|
"learning_rate": 9.792447099321656e-05, |
|
"loss": 1.9697, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.12778873880710276, |
|
"grad_norm": 0.3288556635379791, |
|
"learning_rate": 9.791940872734636e-05, |
|
"loss": 1.7282, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.1280922750037942, |
|
"grad_norm": 0.3872746527194977, |
|
"learning_rate": 9.791434646147616e-05, |
|
"loss": 1.9348, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.12839581120048565, |
|
"grad_norm": 0.37058207392692566, |
|
"learning_rate": 9.790928419560596e-05, |
|
"loss": 1.5684, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.12869934739717712, |
|
"grad_norm": 0.37466561794281006, |
|
"learning_rate": 9.790422192973575e-05, |
|
"loss": 1.9535, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.12900288359386858, |
|
"grad_norm": 0.32176846265792847, |
|
"learning_rate": 9.789915966386555e-05, |
|
"loss": 1.8537, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.12930641979056, |
|
"grad_norm": 0.37653467059135437, |
|
"learning_rate": 9.789409739799534e-05, |
|
"loss": 2.0701, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.12960995598725147, |
|
"grad_norm": 0.38768434524536133, |
|
"learning_rate": 9.788903513212514e-05, |
|
"loss": 1.731, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.12991349218394294, |
|
"grad_norm": 0.5139635801315308, |
|
"learning_rate": 9.788397286625493e-05, |
|
"loss": 2.4437, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.1302170283806344, |
|
"grad_norm": 0.3759630024433136, |
|
"learning_rate": 9.787891060038473e-05, |
|
"loss": 2.0918, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.13052056457732586, |
|
"grad_norm": 0.3718818426132202, |
|
"learning_rate": 9.787384833451452e-05, |
|
"loss": 1.5854, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1308241007740173, |
|
"grad_norm": 0.6460405588150024, |
|
"learning_rate": 9.786878606864432e-05, |
|
"loss": 2.2442, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.13112763697070876, |
|
"grad_norm": 0.40393388271331787, |
|
"learning_rate": 9.786372380277413e-05, |
|
"loss": 1.728, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.13143117316740022, |
|
"grad_norm": 0.3772658407688141, |
|
"learning_rate": 9.785866153690393e-05, |
|
"loss": 1.668, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.13173470936409168, |
|
"grad_norm": 2.5252649784088135, |
|
"learning_rate": 9.785359927103373e-05, |
|
"loss": 1.8864, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.1320382455607831, |
|
"grad_norm": 0.42327219247817993, |
|
"learning_rate": 9.784853700516352e-05, |
|
"loss": 2.3174, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.13234178175747457, |
|
"grad_norm": 0.3689473867416382, |
|
"learning_rate": 9.784347473929332e-05, |
|
"loss": 1.9671, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.13264531795416604, |
|
"grad_norm": 0.37554243206977844, |
|
"learning_rate": 9.783841247342311e-05, |
|
"loss": 1.783, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.1329488541508575, |
|
"grad_norm": 0.409587025642395, |
|
"learning_rate": 9.783335020755291e-05, |
|
"loss": 2.0385, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.13325239034754893, |
|
"grad_norm": 0.349252849817276, |
|
"learning_rate": 9.78282879416827e-05, |
|
"loss": 1.8785, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.1335559265442404, |
|
"grad_norm": 0.36687588691711426, |
|
"learning_rate": 9.78232256758125e-05, |
|
"loss": 2.1174, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13385946274093186, |
|
"grad_norm": 0.40221846103668213, |
|
"learning_rate": 9.781816340994229e-05, |
|
"loss": 1.8385, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.13416299893762332, |
|
"grad_norm": 0.5634617805480957, |
|
"learning_rate": 9.781310114407209e-05, |
|
"loss": 1.9316, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.13446653513431478, |
|
"grad_norm": 0.37704020738601685, |
|
"learning_rate": 9.78080388782019e-05, |
|
"loss": 1.8865, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.1347700713310062, |
|
"grad_norm": 0.36043843626976013, |
|
"learning_rate": 9.780297661233169e-05, |
|
"loss": 1.585, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.13507360752769768, |
|
"grad_norm": 0.33643844723701477, |
|
"learning_rate": 9.779791434646149e-05, |
|
"loss": 1.8098, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.13537714372438914, |
|
"grad_norm": 0.6782101988792419, |
|
"learning_rate": 9.779285208059128e-05, |
|
"loss": 2.0468, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.1356806799210806, |
|
"grad_norm": 0.38101980090141296, |
|
"learning_rate": 9.778778981472108e-05, |
|
"loss": 2.0624, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.13598421611777203, |
|
"grad_norm": 0.399311900138855, |
|
"learning_rate": 9.778272754885087e-05, |
|
"loss": 2.1652, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.1362877523144635, |
|
"grad_norm": 0.3491426706314087, |
|
"learning_rate": 9.777766528298066e-05, |
|
"loss": 1.9092, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.13659128851115496, |
|
"grad_norm": 0.3654717803001404, |
|
"learning_rate": 9.777260301711046e-05, |
|
"loss": 1.9773, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.13689482470784642, |
|
"grad_norm": 0.394699364900589, |
|
"learning_rate": 9.776754075124025e-05, |
|
"loss": 2.1568, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.13719836090453785, |
|
"grad_norm": 0.3601212203502655, |
|
"learning_rate": 9.776247848537006e-05, |
|
"loss": 1.8744, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.13750189710122931, |
|
"grad_norm": 0.40716952085494995, |
|
"learning_rate": 9.775741621949986e-05, |
|
"loss": 2.1052, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.13780543329792078, |
|
"grad_norm": 0.37777504324913025, |
|
"learning_rate": 9.775235395362965e-05, |
|
"loss": 1.8896, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.13810896949461224, |
|
"grad_norm": 0.368600994348526, |
|
"learning_rate": 9.774729168775945e-05, |
|
"loss": 1.8285, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.1384125056913037, |
|
"grad_norm": 0.41742029786109924, |
|
"learning_rate": 9.774222942188924e-05, |
|
"loss": 1.8286, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.13871604188799513, |
|
"grad_norm": 0.40132156014442444, |
|
"learning_rate": 9.773716715601904e-05, |
|
"loss": 1.9515, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.1390195780846866, |
|
"grad_norm": 0.44473376870155334, |
|
"learning_rate": 9.773210489014883e-05, |
|
"loss": 1.8715, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.13932311428137806, |
|
"grad_norm": 0.40146371722221375, |
|
"learning_rate": 9.772704262427863e-05, |
|
"loss": 2.1469, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.13962665047806952, |
|
"grad_norm": 0.3863317370414734, |
|
"learning_rate": 9.772198035840842e-05, |
|
"loss": 1.9215, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13993018667476095, |
|
"grad_norm": 0.40235334634780884, |
|
"learning_rate": 9.771691809253823e-05, |
|
"loss": 2.1276, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.14023372287145242, |
|
"grad_norm": 0.46011632680892944, |
|
"learning_rate": 9.771185582666802e-05, |
|
"loss": 1.244, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.14053725906814388, |
|
"grad_norm": 0.3428272008895874, |
|
"learning_rate": 9.770679356079782e-05, |
|
"loss": 1.7991, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.14084079526483534, |
|
"grad_norm": 0.39976757764816284, |
|
"learning_rate": 9.770173129492761e-05, |
|
"loss": 1.7166, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.1411443314615268, |
|
"grad_norm": 0.3258446753025055, |
|
"learning_rate": 9.769666902905741e-05, |
|
"loss": 1.677, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.14144786765821823, |
|
"grad_norm": 0.3950905501842499, |
|
"learning_rate": 9.76916067631872e-05, |
|
"loss": 2.0122, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.1417514038549097, |
|
"grad_norm": 0.39712047576904297, |
|
"learning_rate": 9.7686544497317e-05, |
|
"loss": 1.7262, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.14205494005160116, |
|
"grad_norm": 0.8331599235534668, |
|
"learning_rate": 9.768148223144679e-05, |
|
"loss": 1.9852, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.14235847624829262, |
|
"grad_norm": 0.3578427731990814, |
|
"learning_rate": 9.767641996557659e-05, |
|
"loss": 1.8249, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.14266201244498405, |
|
"grad_norm": 0.3736058473587036, |
|
"learning_rate": 9.767135769970638e-05, |
|
"loss": 1.43, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.14296554864167552, |
|
"grad_norm": 0.48153185844421387, |
|
"learning_rate": 9.766629543383619e-05, |
|
"loss": 1.8667, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.14326908483836698, |
|
"grad_norm": 0.3924524188041687, |
|
"learning_rate": 9.766123316796599e-05, |
|
"loss": 2.0385, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.14357262103505844, |
|
"grad_norm": 0.38956940174102783, |
|
"learning_rate": 9.765617090209578e-05, |
|
"loss": 1.3157, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.14387615723174987, |
|
"grad_norm": 0.4032903015613556, |
|
"learning_rate": 9.765110863622558e-05, |
|
"loss": 1.8793, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.14417969342844134, |
|
"grad_norm": 0.5116568207740784, |
|
"learning_rate": 9.764604637035537e-05, |
|
"loss": 1.7658, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.1444832296251328, |
|
"grad_norm": 0.3981756269931793, |
|
"learning_rate": 9.764098410448517e-05, |
|
"loss": 1.8087, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.14478676582182426, |
|
"grad_norm": 0.43181854486465454, |
|
"learning_rate": 9.763592183861496e-05, |
|
"loss": 1.5241, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.14509030201851572, |
|
"grad_norm": 0.4172961413860321, |
|
"learning_rate": 9.763085957274477e-05, |
|
"loss": 1.8318, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.14539383821520716, |
|
"grad_norm": 0.4135033190250397, |
|
"learning_rate": 9.762579730687456e-05, |
|
"loss": 2.0783, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.14569737441189862, |
|
"grad_norm": 0.36482739448547363, |
|
"learning_rate": 9.762073504100436e-05, |
|
"loss": 2.2524, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.14600091060859008, |
|
"grad_norm": 0.3704656958580017, |
|
"learning_rate": 9.761567277513415e-05, |
|
"loss": 2.0369, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.14630444680528154, |
|
"grad_norm": 1.588393211364746, |
|
"learning_rate": 9.761061050926396e-05, |
|
"loss": 1.8041, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.14660798300197297, |
|
"grad_norm": 0.3309743404388428, |
|
"learning_rate": 9.760554824339376e-05, |
|
"loss": 1.8373, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.14691151919866444, |
|
"grad_norm": 0.34598830342292786, |
|
"learning_rate": 9.760048597752355e-05, |
|
"loss": 1.6249, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.1472150553953559, |
|
"grad_norm": 0.3433639109134674, |
|
"learning_rate": 9.759542371165335e-05, |
|
"loss": 1.9454, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.14751859159204736, |
|
"grad_norm": 0.3801734149456024, |
|
"learning_rate": 9.759036144578314e-05, |
|
"loss": 2.1067, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.1478221277887388, |
|
"grad_norm": 0.36811041831970215, |
|
"learning_rate": 9.758529917991293e-05, |
|
"loss": 1.8642, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.14812566398543026, |
|
"grad_norm": 0.3999156355857849, |
|
"learning_rate": 9.758023691404273e-05, |
|
"loss": 2.1482, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.14842920018212172, |
|
"grad_norm": 0.7651489973068237, |
|
"learning_rate": 9.757517464817252e-05, |
|
"loss": 1.8213, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.14873273637881318, |
|
"grad_norm": 0.3491712808609009, |
|
"learning_rate": 9.757011238230232e-05, |
|
"loss": 2.1047, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.14903627257550464, |
|
"grad_norm": 1.028256893157959, |
|
"learning_rate": 9.756505011643213e-05, |
|
"loss": 2.0519, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.14933980877219608, |
|
"grad_norm": 0.5957101583480835, |
|
"learning_rate": 9.755998785056192e-05, |
|
"loss": 2.1236, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.14964334496888754, |
|
"grad_norm": 0.40934717655181885, |
|
"learning_rate": 9.755492558469172e-05, |
|
"loss": 1.5391, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.149946881165579, |
|
"grad_norm": 0.4403507709503174, |
|
"learning_rate": 9.754986331882151e-05, |
|
"loss": 1.8388, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.15025041736227046, |
|
"grad_norm": 0.4258563220500946, |
|
"learning_rate": 9.754480105295131e-05, |
|
"loss": 1.8092, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.1505539535589619, |
|
"grad_norm": 0.3594823181629181, |
|
"learning_rate": 9.75397387870811e-05, |
|
"loss": 1.7195, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.15085748975565336, |
|
"grad_norm": 0.30373120307922363, |
|
"learning_rate": 9.75346765212109e-05, |
|
"loss": 1.9267, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.15116102595234482, |
|
"grad_norm": 0.423096626996994, |
|
"learning_rate": 9.752961425534069e-05, |
|
"loss": 2.1559, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.15146456214903628, |
|
"grad_norm": 0.36935552954673767, |
|
"learning_rate": 9.752455198947049e-05, |
|
"loss": 2.0357, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.15176809834572771, |
|
"grad_norm": 0.7172725200653076, |
|
"learning_rate": 9.75194897236003e-05, |
|
"loss": 2.0973, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15207163454241918, |
|
"grad_norm": 0.36897605657577515, |
|
"learning_rate": 9.751442745773009e-05, |
|
"loss": 2.1672, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.15237517073911064, |
|
"grad_norm": 0.35079488158226013, |
|
"learning_rate": 9.750936519185988e-05, |
|
"loss": 2.0808, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.1526787069358021, |
|
"grad_norm": 0.37833186984062195, |
|
"learning_rate": 9.750430292598968e-05, |
|
"loss": 1.8393, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.15298224313249356, |
|
"grad_norm": 0.3969264328479767, |
|
"learning_rate": 9.749924066011947e-05, |
|
"loss": 2.1213, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.153285779329185, |
|
"grad_norm": 0.30432841181755066, |
|
"learning_rate": 9.749417839424927e-05, |
|
"loss": 1.6397, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.15358931552587646, |
|
"grad_norm": 0.30847886204719543, |
|
"learning_rate": 9.748911612837906e-05, |
|
"loss": 1.6455, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.15389285172256792, |
|
"grad_norm": 0.38480496406555176, |
|
"learning_rate": 9.748405386250886e-05, |
|
"loss": 1.803, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.15419638791925938, |
|
"grad_norm": 0.48439183831214905, |
|
"learning_rate": 9.747899159663865e-05, |
|
"loss": 1.6892, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.15449992411595082, |
|
"grad_norm": 0.5124354362487793, |
|
"learning_rate": 9.747392933076845e-05, |
|
"loss": 2.24, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.15480346031264228, |
|
"grad_norm": 0.4051717221736908, |
|
"learning_rate": 9.746886706489826e-05, |
|
"loss": 1.8621, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15510699650933374, |
|
"grad_norm": 0.6452261209487915, |
|
"learning_rate": 9.746380479902805e-05, |
|
"loss": 1.7043, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.1554105327060252, |
|
"grad_norm": 0.5453522801399231, |
|
"learning_rate": 9.745874253315785e-05, |
|
"loss": 1.7325, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.15571406890271666, |
|
"grad_norm": 1.0983595848083496, |
|
"learning_rate": 9.745368026728764e-05, |
|
"loss": 2.169, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.1560176050994081, |
|
"grad_norm": 0.3821035623550415, |
|
"learning_rate": 9.744861800141744e-05, |
|
"loss": 2.3305, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.15632114129609956, |
|
"grad_norm": 0.3694508969783783, |
|
"learning_rate": 9.744355573554723e-05, |
|
"loss": 1.8453, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.15662467749279102, |
|
"grad_norm": 0.3837510943412781, |
|
"learning_rate": 9.743849346967702e-05, |
|
"loss": 1.9679, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.15692821368948248, |
|
"grad_norm": 0.41427966952323914, |
|
"learning_rate": 9.743343120380682e-05, |
|
"loss": 1.9331, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.15723174988617392, |
|
"grad_norm": 0.34252259135246277, |
|
"learning_rate": 9.742836893793661e-05, |
|
"loss": 1.7938, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.15753528608286538, |
|
"grad_norm": 0.4043283462524414, |
|
"learning_rate": 9.742330667206642e-05, |
|
"loss": 1.4037, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.15783882227955684, |
|
"grad_norm": 0.4225389361381531, |
|
"learning_rate": 9.741824440619622e-05, |
|
"loss": 1.6224, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1581423584762483, |
|
"grad_norm": 0.377590537071228, |
|
"learning_rate": 9.741318214032601e-05, |
|
"loss": 2.0567, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.15844589467293974, |
|
"grad_norm": 0.46170124411582947, |
|
"learning_rate": 9.740811987445582e-05, |
|
"loss": 2.0449, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.1587494308696312, |
|
"grad_norm": 0.3752427399158478, |
|
"learning_rate": 9.740305760858562e-05, |
|
"loss": 1.8207, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.15905296706632266, |
|
"grad_norm": 0.390803724527359, |
|
"learning_rate": 9.739799534271541e-05, |
|
"loss": 2.0781, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.15935650326301412, |
|
"grad_norm": 0.38587453961372375, |
|
"learning_rate": 9.73929330768452e-05, |
|
"loss": 1.9932, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.15966003945970558, |
|
"grad_norm": 0.4154350459575653, |
|
"learning_rate": 9.7387870810975e-05, |
|
"loss": 1.7649, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.15996357565639702, |
|
"grad_norm": 0.3698589503765106, |
|
"learning_rate": 9.73828085451048e-05, |
|
"loss": 1.6921, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.16026711185308848, |
|
"grad_norm": 0.4110312759876251, |
|
"learning_rate": 9.737774627923459e-05, |
|
"loss": 1.1834, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.16057064804977994, |
|
"grad_norm": 0.4140758812427521, |
|
"learning_rate": 9.737268401336438e-05, |
|
"loss": 1.8354, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.1608741842464714, |
|
"grad_norm": 0.38738423585891724, |
|
"learning_rate": 9.736762174749419e-05, |
|
"loss": 1.9223, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.16117772044316284, |
|
"grad_norm": 0.4055260717868805, |
|
"learning_rate": 9.736255948162399e-05, |
|
"loss": 1.7802, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.1614812566398543, |
|
"grad_norm": 0.44946524500846863, |
|
"learning_rate": 9.735749721575378e-05, |
|
"loss": 1.8654, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.16178479283654576, |
|
"grad_norm": 0.43206432461738586, |
|
"learning_rate": 9.735243494988358e-05, |
|
"loss": 1.7607, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.16208832903323722, |
|
"grad_norm": 0.5007991194725037, |
|
"learning_rate": 9.734737268401337e-05, |
|
"loss": 1.9378, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.16239186522992866, |
|
"grad_norm": 0.48757919669151306, |
|
"learning_rate": 9.734231041814317e-05, |
|
"loss": 2.1829, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.16269540142662012, |
|
"grad_norm": 0.4159701466560364, |
|
"learning_rate": 9.733724815227296e-05, |
|
"loss": 1.8847, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.16299893762331158, |
|
"grad_norm": 0.40922749042510986, |
|
"learning_rate": 9.733218588640276e-05, |
|
"loss": 1.4376, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.16330247382000304, |
|
"grad_norm": 0.33677083253860474, |
|
"learning_rate": 9.732712362053255e-05, |
|
"loss": 1.9568, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.1636060100166945, |
|
"grad_norm": 0.3255022168159485, |
|
"learning_rate": 9.732206135466236e-05, |
|
"loss": 1.9949, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.16390954621338594, |
|
"grad_norm": 0.3848338723182678, |
|
"learning_rate": 9.731699908879215e-05, |
|
"loss": 2.042, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1642130824100774, |
|
"grad_norm": 0.3888263404369354, |
|
"learning_rate": 9.731193682292195e-05, |
|
"loss": 1.885, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.16451661860676886, |
|
"grad_norm": 0.40090805292129517, |
|
"learning_rate": 9.730687455705174e-05, |
|
"loss": 1.9093, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.16482015480346032, |
|
"grad_norm": 0.4106220602989197, |
|
"learning_rate": 9.730181229118154e-05, |
|
"loss": 1.8392, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.16512369100015176, |
|
"grad_norm": 0.3483395278453827, |
|
"learning_rate": 9.729675002531133e-05, |
|
"loss": 2.0235, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.16542722719684322, |
|
"grad_norm": 0.3686208128929138, |
|
"learning_rate": 9.729168775944113e-05, |
|
"loss": 1.9218, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.16573076339353468, |
|
"grad_norm": 0.36063849925994873, |
|
"learning_rate": 9.728662549357092e-05, |
|
"loss": 1.9334, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.16603429959022614, |
|
"grad_norm": 0.39365142583847046, |
|
"learning_rate": 9.728156322770072e-05, |
|
"loss": 1.9825, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.16633783578691758, |
|
"grad_norm": 0.4062787592411041, |
|
"learning_rate": 9.727650096183051e-05, |
|
"loss": 1.521, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.16664137198360904, |
|
"grad_norm": 0.37347134947776794, |
|
"learning_rate": 9.727143869596032e-05, |
|
"loss": 1.9356, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.1669449081803005, |
|
"grad_norm": 0.3538997173309326, |
|
"learning_rate": 9.726637643009012e-05, |
|
"loss": 1.845, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.16724844437699196, |
|
"grad_norm": 0.3868335783481598, |
|
"learning_rate": 9.726131416421991e-05, |
|
"loss": 1.9803, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.16755198057368342, |
|
"grad_norm": 0.34705451130867004, |
|
"learning_rate": 9.72562518983497e-05, |
|
"loss": 2.0866, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.16785551677037486, |
|
"grad_norm": 0.3794872462749481, |
|
"learning_rate": 9.72511896324795e-05, |
|
"loss": 2.094, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.16815905296706632, |
|
"grad_norm": 0.5801231861114502, |
|
"learning_rate": 9.72461273666093e-05, |
|
"loss": 1.7851, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.16846258916375778, |
|
"grad_norm": 0.3076344132423401, |
|
"learning_rate": 9.724106510073909e-05, |
|
"loss": 1.5188, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.16876612536044924, |
|
"grad_norm": 0.3552989363670349, |
|
"learning_rate": 9.723600283486888e-05, |
|
"loss": 2.1063, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.16906966155714068, |
|
"grad_norm": 0.36939847469329834, |
|
"learning_rate": 9.723094056899868e-05, |
|
"loss": 1.7648, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.16937319775383214, |
|
"grad_norm": 0.358634889125824, |
|
"learning_rate": 9.722587830312849e-05, |
|
"loss": 1.8007, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.1696767339505236, |
|
"grad_norm": 0.39962029457092285, |
|
"learning_rate": 9.722081603725828e-05, |
|
"loss": 1.8845, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.16998027014721506, |
|
"grad_norm": 0.4099076986312866, |
|
"learning_rate": 9.721575377138808e-05, |
|
"loss": 1.8894, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17028380634390652, |
|
"grad_norm": 0.3610551655292511, |
|
"learning_rate": 9.721069150551787e-05, |
|
"loss": 1.8089, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.17058734254059796, |
|
"grad_norm": 0.5951200723648071, |
|
"learning_rate": 9.720562923964767e-05, |
|
"loss": 1.6966, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.17089087873728942, |
|
"grad_norm": 0.562522292137146, |
|
"learning_rate": 9.720056697377746e-05, |
|
"loss": 1.7704, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.17119441493398088, |
|
"grad_norm": 0.6662526726722717, |
|
"learning_rate": 9.719550470790726e-05, |
|
"loss": 1.7714, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.17149795113067234, |
|
"grad_norm": 0.44034865498542786, |
|
"learning_rate": 9.719044244203705e-05, |
|
"loss": 2.1042, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.17180148732736378, |
|
"grad_norm": 0.39868202805519104, |
|
"learning_rate": 9.718538017616685e-05, |
|
"loss": 1.952, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.17210502352405524, |
|
"grad_norm": 0.3427380621433258, |
|
"learning_rate": 9.718031791029665e-05, |
|
"loss": 2.037, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.1724085597207467, |
|
"grad_norm": 0.37980929017066956, |
|
"learning_rate": 9.717525564442645e-05, |
|
"loss": 1.5378, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.17271209591743816, |
|
"grad_norm": 0.32314518094062805, |
|
"learning_rate": 9.717019337855626e-05, |
|
"loss": 1.6191, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.1730156321141296, |
|
"grad_norm": 0.40600740909576416, |
|
"learning_rate": 9.716513111268605e-05, |
|
"loss": 1.6055, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17331916831082106, |
|
"grad_norm": 0.37318041920661926, |
|
"learning_rate": 9.716006884681585e-05, |
|
"loss": 1.8666, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.17362270450751252, |
|
"grad_norm": 0.3656068444252014, |
|
"learning_rate": 9.715500658094564e-05, |
|
"loss": 1.5983, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.17392624070420398, |
|
"grad_norm": 0.3546827733516693, |
|
"learning_rate": 9.714994431507544e-05, |
|
"loss": 2.2088, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.17422977690089544, |
|
"grad_norm": 0.4293152689933777, |
|
"learning_rate": 9.714488204920523e-05, |
|
"loss": 1.803, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.17453331309758688, |
|
"grad_norm": 0.3790314495563507, |
|
"learning_rate": 9.713981978333503e-05, |
|
"loss": 1.9874, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.17483684929427834, |
|
"grad_norm": 0.37619829177856445, |
|
"learning_rate": 9.713475751746482e-05, |
|
"loss": 1.9061, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.1751403854909698, |
|
"grad_norm": 0.36988991498947144, |
|
"learning_rate": 9.712969525159462e-05, |
|
"loss": 1.5463, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.17544392168766126, |
|
"grad_norm": 0.367721825838089, |
|
"learning_rate": 9.712463298572442e-05, |
|
"loss": 1.6526, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.1757474578843527, |
|
"grad_norm": 0.39620110392570496, |
|
"learning_rate": 9.711957071985422e-05, |
|
"loss": 2.056, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.17605099408104416, |
|
"grad_norm": 0.41518276929855347, |
|
"learning_rate": 9.711450845398401e-05, |
|
"loss": 1.6847, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.17635453027773562, |
|
"grad_norm": 0.3925170302391052, |
|
"learning_rate": 9.710944618811381e-05, |
|
"loss": 1.8476, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.17665806647442708, |
|
"grad_norm": 0.36658090353012085, |
|
"learning_rate": 9.71043839222436e-05, |
|
"loss": 2.0699, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.17696160267111852, |
|
"grad_norm": 0.3741433620452881, |
|
"learning_rate": 9.70993216563734e-05, |
|
"loss": 1.9645, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.17726513886780998, |
|
"grad_norm": 0.3742316663265228, |
|
"learning_rate": 9.709425939050319e-05, |
|
"loss": 2.3717, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.17756867506450144, |
|
"grad_norm": 0.3796440660953522, |
|
"learning_rate": 9.708919712463299e-05, |
|
"loss": 1.9356, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.1778722112611929, |
|
"grad_norm": 0.3976511061191559, |
|
"learning_rate": 9.708413485876278e-05, |
|
"loss": 2.1889, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.17817574745788436, |
|
"grad_norm": 0.34445542097091675, |
|
"learning_rate": 9.707907259289258e-05, |
|
"loss": 1.6535, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.1784792836545758, |
|
"grad_norm": 0.3982098698616028, |
|
"learning_rate": 9.707401032702239e-05, |
|
"loss": 2.0542, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.17878281985126726, |
|
"grad_norm": 0.42155295610427856, |
|
"learning_rate": 9.706894806115218e-05, |
|
"loss": 1.4605, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.17908635604795872, |
|
"grad_norm": 0.36341744661331177, |
|
"learning_rate": 9.706388579528197e-05, |
|
"loss": 1.8069, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.17938989224465018, |
|
"grad_norm": 0.3715178668498993, |
|
"learning_rate": 9.705882352941177e-05, |
|
"loss": 1.5512, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.17969342844134162, |
|
"grad_norm": 0.376767635345459, |
|
"learning_rate": 9.705376126354156e-05, |
|
"loss": 1.6027, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.17999696463803308, |
|
"grad_norm": 0.4033347964286804, |
|
"learning_rate": 9.704869899767136e-05, |
|
"loss": 1.5071, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.18030050083472454, |
|
"grad_norm": 0.8200478553771973, |
|
"learning_rate": 9.704363673180115e-05, |
|
"loss": 1.924, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.180604037031416, |
|
"grad_norm": 0.6224507093429565, |
|
"learning_rate": 9.703857446593095e-05, |
|
"loss": 1.9684, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.18090757322810747, |
|
"grad_norm": 0.32032859325408936, |
|
"learning_rate": 9.703351220006074e-05, |
|
"loss": 1.9478, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.1812111094247989, |
|
"grad_norm": 0.33331337571144104, |
|
"learning_rate": 9.702844993419055e-05, |
|
"loss": 1.8177, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.18151464562149036, |
|
"grad_norm": 0.47399207949638367, |
|
"learning_rate": 9.702338766832035e-05, |
|
"loss": 2.07, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 0.30480411648750305, |
|
"learning_rate": 9.701832540245014e-05, |
|
"loss": 2.0407, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.18212171801487329, |
|
"grad_norm": 0.40148988366127014, |
|
"learning_rate": 9.701326313657994e-05, |
|
"loss": 1.8774, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18242525421156472, |
|
"grad_norm": 0.3958423137664795, |
|
"learning_rate": 9.700820087070973e-05, |
|
"loss": 1.8462, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.18272879040825618, |
|
"grad_norm": 0.34824639558792114, |
|
"learning_rate": 9.700313860483953e-05, |
|
"loss": 1.7839, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.18303232660494764, |
|
"grad_norm": 0.38002872467041016, |
|
"learning_rate": 9.699807633896932e-05, |
|
"loss": 2.3237, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.1833358628016391, |
|
"grad_norm": 0.37800419330596924, |
|
"learning_rate": 9.699301407309912e-05, |
|
"loss": 1.9375, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.18363939899833054, |
|
"grad_norm": 0.4041115939617157, |
|
"learning_rate": 9.698795180722891e-05, |
|
"loss": 2.029, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.183942935195022, |
|
"grad_norm": 0.3697315454483032, |
|
"learning_rate": 9.698288954135872e-05, |
|
"loss": 1.894, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.18424647139171346, |
|
"grad_norm": 0.3809906542301178, |
|
"learning_rate": 9.697782727548851e-05, |
|
"loss": 1.8242, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.18455000758840492, |
|
"grad_norm": 0.3997717499732971, |
|
"learning_rate": 9.697276500961831e-05, |
|
"loss": 2.0522, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.18485354378509639, |
|
"grad_norm": 0.391699880361557, |
|
"learning_rate": 9.69677027437481e-05, |
|
"loss": 1.8521, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.18515707998178782, |
|
"grad_norm": 0.3667858839035034, |
|
"learning_rate": 9.69626404778779e-05, |
|
"loss": 1.7613, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.18546061617847928, |
|
"grad_norm": 0.3905411958694458, |
|
"learning_rate": 9.69575782120077e-05, |
|
"loss": 1.8285, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.18576415237517074, |
|
"grad_norm": 0.4121951758861542, |
|
"learning_rate": 9.69525159461375e-05, |
|
"loss": 1.8104, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.1860676885718622, |
|
"grad_norm": 0.34977591037750244, |
|
"learning_rate": 9.69474536802673e-05, |
|
"loss": 1.7737, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.18637122476855364, |
|
"grad_norm": 0.34084367752075195, |
|
"learning_rate": 9.694239141439709e-05, |
|
"loss": 2.0407, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.1866747609652451, |
|
"grad_norm": 0.35442525148391724, |
|
"learning_rate": 9.693732914852689e-05, |
|
"loss": 1.9152, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.18697829716193656, |
|
"grad_norm": 0.34404149651527405, |
|
"learning_rate": 9.693226688265668e-05, |
|
"loss": 1.7621, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.18728183335862802, |
|
"grad_norm": 0.4516477882862091, |
|
"learning_rate": 9.692720461678649e-05, |
|
"loss": 1.7624, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.18758536955531946, |
|
"grad_norm": 0.3506614565849304, |
|
"learning_rate": 9.692214235091628e-05, |
|
"loss": 1.6627, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.18788890575201092, |
|
"grad_norm": 0.9165719151496887, |
|
"learning_rate": 9.691708008504608e-05, |
|
"loss": 2.1926, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.18819244194870238, |
|
"grad_norm": 0.3361871838569641, |
|
"learning_rate": 9.691201781917587e-05, |
|
"loss": 1.5229, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.18849597814539384, |
|
"grad_norm": 0.32639381289482117, |
|
"learning_rate": 9.690695555330567e-05, |
|
"loss": 1.8778, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.1887995143420853, |
|
"grad_norm": 0.44261273741722107, |
|
"learning_rate": 9.690189328743546e-05, |
|
"loss": 2.0903, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.18910305053877674, |
|
"grad_norm": 0.4438890516757965, |
|
"learning_rate": 9.689683102156526e-05, |
|
"loss": 1.772, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.1894065867354682, |
|
"grad_norm": 0.40160682797431946, |
|
"learning_rate": 9.689176875569505e-05, |
|
"loss": 2.0964, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.18971012293215966, |
|
"grad_norm": 0.4022195637226105, |
|
"learning_rate": 9.688670648982485e-05, |
|
"loss": 1.7818, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.19001365912885113, |
|
"grad_norm": 0.4233214855194092, |
|
"learning_rate": 9.688164422395464e-05, |
|
"loss": 1.922, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.19031719532554256, |
|
"grad_norm": 0.3864254057407379, |
|
"learning_rate": 9.687658195808445e-05, |
|
"loss": 2.0279, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.19062073152223402, |
|
"grad_norm": 0.36527585983276367, |
|
"learning_rate": 9.687151969221424e-05, |
|
"loss": 2.0732, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.19092426771892548, |
|
"grad_norm": 0.399237722158432, |
|
"learning_rate": 9.686645742634404e-05, |
|
"loss": 1.8889, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.19122780391561695, |
|
"grad_norm": 0.3860459625720978, |
|
"learning_rate": 9.686139516047383e-05, |
|
"loss": 1.968, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.19153134011230838, |
|
"grad_norm": 0.32555973529815674, |
|
"learning_rate": 9.685633289460363e-05, |
|
"loss": 2.0722, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.19183487630899984, |
|
"grad_norm": 0.6093998551368713, |
|
"learning_rate": 9.685127062873342e-05, |
|
"loss": 1.8553, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.1921384125056913, |
|
"grad_norm": 0.4218057692050934, |
|
"learning_rate": 9.684620836286322e-05, |
|
"loss": 1.9647, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.19244194870238276, |
|
"grad_norm": 0.3779148757457733, |
|
"learning_rate": 9.684114609699301e-05, |
|
"loss": 2.0681, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.19274548489907423, |
|
"grad_norm": 0.3820381760597229, |
|
"learning_rate": 9.683608383112281e-05, |
|
"loss": 2.0603, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.19304902109576566, |
|
"grad_norm": 0.29337063431739807, |
|
"learning_rate": 9.683102156525262e-05, |
|
"loss": 1.7516, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.19335255729245712, |
|
"grad_norm": 0.4369249939918518, |
|
"learning_rate": 9.682595929938241e-05, |
|
"loss": 1.9822, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.19365609348914858, |
|
"grad_norm": 0.3766214847564697, |
|
"learning_rate": 9.68208970335122e-05, |
|
"loss": 1.7229, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.19395962968584005, |
|
"grad_norm": 0.4765011668205261, |
|
"learning_rate": 9.6815834767642e-05, |
|
"loss": 1.2865, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.19426316588253148, |
|
"grad_norm": 0.34236472845077515, |
|
"learning_rate": 9.68107725017718e-05, |
|
"loss": 2.1024, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.19456670207922294, |
|
"grad_norm": 0.398076593875885, |
|
"learning_rate": 9.680571023590159e-05, |
|
"loss": 1.8628, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.1948702382759144, |
|
"grad_norm": 0.357099711894989, |
|
"learning_rate": 9.680064797003139e-05, |
|
"loss": 2.2163, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.19517377447260587, |
|
"grad_norm": 0.3296545445919037, |
|
"learning_rate": 9.679558570416118e-05, |
|
"loss": 1.8227, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.19547731066929733, |
|
"grad_norm": 0.36754927039146423, |
|
"learning_rate": 9.679052343829098e-05, |
|
"loss": 1.7179, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.19578084686598876, |
|
"grad_norm": 0.37275364995002747, |
|
"learning_rate": 9.678546117242078e-05, |
|
"loss": 1.6782, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.19608438306268022, |
|
"grad_norm": 0.3951006531715393, |
|
"learning_rate": 9.678039890655058e-05, |
|
"loss": 2.0756, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.19638791925937168, |
|
"grad_norm": 0.3560970425605774, |
|
"learning_rate": 9.677533664068037e-05, |
|
"loss": 1.8093, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.19669145545606315, |
|
"grad_norm": 0.31553730368614197, |
|
"learning_rate": 9.677027437481017e-05, |
|
"loss": 1.9174, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.19699499165275458, |
|
"grad_norm": 0.39949625730514526, |
|
"learning_rate": 9.676521210893996e-05, |
|
"loss": 1.6687, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.19729852784944604, |
|
"grad_norm": 0.37323635816574097, |
|
"learning_rate": 9.676014984306976e-05, |
|
"loss": 1.8149, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.1976020640461375, |
|
"grad_norm": 0.43527746200561523, |
|
"learning_rate": 9.675508757719955e-05, |
|
"loss": 1.8744, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.19790560024282897, |
|
"grad_norm": 0.39380425214767456, |
|
"learning_rate": 9.675002531132935e-05, |
|
"loss": 1.9721, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.1982091364395204, |
|
"grad_norm": 0.3384545147418976, |
|
"learning_rate": 9.674496304545914e-05, |
|
"loss": 2.0122, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.19851267263621186, |
|
"grad_norm": 0.39647915959358215, |
|
"learning_rate": 9.673990077958894e-05, |
|
"loss": 2.2419, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.19881620883290332, |
|
"grad_norm": 0.3358941674232483, |
|
"learning_rate": 9.673483851371875e-05, |
|
"loss": 1.8758, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.19911974502959479, |
|
"grad_norm": 0.3486049771308899, |
|
"learning_rate": 9.672977624784855e-05, |
|
"loss": 1.5762, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.19942328122628625, |
|
"grad_norm": 2.3050696849823, |
|
"learning_rate": 9.672471398197835e-05, |
|
"loss": 2.0056, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.19972681742297768, |
|
"grad_norm": 0.35023945569992065, |
|
"learning_rate": 9.671965171610814e-05, |
|
"loss": 1.619, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.20003035361966914, |
|
"grad_norm": 0.513656735420227, |
|
"learning_rate": 9.671458945023794e-05, |
|
"loss": 1.5269, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.2003338898163606, |
|
"grad_norm": 0.37498149275779724, |
|
"learning_rate": 9.670952718436773e-05, |
|
"loss": 1.8553, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.20063742601305207, |
|
"grad_norm": 0.4101942479610443, |
|
"learning_rate": 9.670446491849753e-05, |
|
"loss": 2.1121, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.2009409622097435, |
|
"grad_norm": 0.4265679717063904, |
|
"learning_rate": 9.669940265262732e-05, |
|
"loss": 2.1863, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.20124449840643496, |
|
"grad_norm": 4.817168712615967, |
|
"learning_rate": 9.669434038675712e-05, |
|
"loss": 2.0906, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.20154803460312642, |
|
"grad_norm": 7.518252849578857, |
|
"learning_rate": 9.668927812088691e-05, |
|
"loss": 1.8889, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.2018515707998179, |
|
"grad_norm": 0.5480749011039734, |
|
"learning_rate": 9.66842158550167e-05, |
|
"loss": 1.8439, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.20215510699650932, |
|
"grad_norm": 0.3578292429447174, |
|
"learning_rate": 9.667915358914651e-05, |
|
"loss": 1.8742, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.20245864319320078, |
|
"grad_norm": 0.3799275755882263, |
|
"learning_rate": 9.667409132327631e-05, |
|
"loss": 1.994, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.20276217938989224, |
|
"grad_norm": 0.3736335039138794, |
|
"learning_rate": 9.66690290574061e-05, |
|
"loss": 1.7933, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.2030657155865837, |
|
"grad_norm": 0.3145211637020111, |
|
"learning_rate": 9.66639667915359e-05, |
|
"loss": 1.8193, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.20336925178327517, |
|
"grad_norm": 0.4940774142742157, |
|
"learning_rate": 9.66589045256657e-05, |
|
"loss": 1.9238, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2036727879799666, |
|
"grad_norm": 0.431134968996048, |
|
"learning_rate": 9.665384225979549e-05, |
|
"loss": 1.5493, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.20397632417665806, |
|
"grad_norm": 0.41438859701156616, |
|
"learning_rate": 9.664877999392528e-05, |
|
"loss": 1.2076, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.20427986037334953, |
|
"grad_norm": 0.38191312551498413, |
|
"learning_rate": 9.664371772805508e-05, |
|
"loss": 1.8201, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.204583396570041, |
|
"grad_norm": 0.3938577175140381, |
|
"learning_rate": 9.663865546218487e-05, |
|
"loss": 1.5166, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.20488693276673242, |
|
"grad_norm": 0.46312233805656433, |
|
"learning_rate": 9.663359319631468e-05, |
|
"loss": 1.4652, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.20519046896342388, |
|
"grad_norm": 0.4087234139442444, |
|
"learning_rate": 9.662853093044448e-05, |
|
"loss": 1.8288, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.20549400516011535, |
|
"grad_norm": 0.37329304218292236, |
|
"learning_rate": 9.662346866457427e-05, |
|
"loss": 1.9084, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.2057975413568068, |
|
"grad_norm": 0.37109607458114624, |
|
"learning_rate": 9.661840639870407e-05, |
|
"loss": 1.9674, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.20610107755349824, |
|
"grad_norm": 0.3936561942100525, |
|
"learning_rate": 9.661334413283386e-05, |
|
"loss": 2.0342, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.2064046137501897, |
|
"grad_norm": 0.4621008634567261, |
|
"learning_rate": 9.660828186696366e-05, |
|
"loss": 1.5157, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.20670814994688116, |
|
"grad_norm": 0.3849358558654785, |
|
"learning_rate": 9.660321960109345e-05, |
|
"loss": 2.1513, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.20701168614357263, |
|
"grad_norm": 0.4873330295085907, |
|
"learning_rate": 9.659815733522325e-05, |
|
"loss": 1.9116, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.2073152223402641, |
|
"grad_norm": 0.4687885642051697, |
|
"learning_rate": 9.659309506935304e-05, |
|
"loss": 2.278, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.20761875853695552, |
|
"grad_norm": 0.3966952860355377, |
|
"learning_rate": 9.658803280348285e-05, |
|
"loss": 1.4625, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.20792229473364698, |
|
"grad_norm": 0.5782402157783508, |
|
"learning_rate": 9.658297053761264e-05, |
|
"loss": 2.2779, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.20822583093033845, |
|
"grad_norm": 0.37465688586235046, |
|
"learning_rate": 9.657790827174244e-05, |
|
"loss": 1.8462, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.2085293671270299, |
|
"grad_norm": 0.34408631920814514, |
|
"learning_rate": 9.657284600587223e-05, |
|
"loss": 1.9881, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.20883290332372134, |
|
"grad_norm": 0.6892307996749878, |
|
"learning_rate": 9.656778374000203e-05, |
|
"loss": 1.9835, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.2091364395204128, |
|
"grad_norm": 0.3698042631149292, |
|
"learning_rate": 9.656272147413182e-05, |
|
"loss": 2.0665, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.20943997571710427, |
|
"grad_norm": 0.41265738010406494, |
|
"learning_rate": 9.655765920826162e-05, |
|
"loss": 2.0231, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.20974351191379573, |
|
"grad_norm": 0.38251030445098877, |
|
"learning_rate": 9.655259694239141e-05, |
|
"loss": 1.7058, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.2100470481104872, |
|
"grad_norm": 0.468905508518219, |
|
"learning_rate": 9.65475346765212e-05, |
|
"loss": 1.6182, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.21035058430717862, |
|
"grad_norm": 1.0570484399795532, |
|
"learning_rate": 9.6542472410651e-05, |
|
"loss": 2.0165, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.21065412050387008, |
|
"grad_norm": 0.3978007435798645, |
|
"learning_rate": 9.653741014478081e-05, |
|
"loss": 1.7859, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.21095765670056155, |
|
"grad_norm": 0.42616939544677734, |
|
"learning_rate": 9.65323478789106e-05, |
|
"loss": 1.5197, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.211261192897253, |
|
"grad_norm": 0.39380377531051636, |
|
"learning_rate": 9.65272856130404e-05, |
|
"loss": 1.3796, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.21156472909394444, |
|
"grad_norm": 0.38581010699272156, |
|
"learning_rate": 9.65222233471702e-05, |
|
"loss": 1.8214, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.2118682652906359, |
|
"grad_norm": 0.3610150218009949, |
|
"learning_rate": 9.651716108129999e-05, |
|
"loss": 1.897, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.21217180148732737, |
|
"grad_norm": 0.44913700222969055, |
|
"learning_rate": 9.651209881542978e-05, |
|
"loss": 1.8873, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.21247533768401883, |
|
"grad_norm": 1.9599745273590088, |
|
"learning_rate": 9.650703654955959e-05, |
|
"loss": 1.946, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.21277887388071026, |
|
"grad_norm": 1.195716381072998, |
|
"learning_rate": 9.650197428368939e-05, |
|
"loss": 1.8749, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.21308241007740172, |
|
"grad_norm": 0.3154665231704712, |
|
"learning_rate": 9.649691201781918e-05, |
|
"loss": 1.5924, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.21338594627409319, |
|
"grad_norm": 0.3550672233104706, |
|
"learning_rate": 9.649184975194898e-05, |
|
"loss": 1.6094, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.21368948247078465, |
|
"grad_norm": 0.33744126558303833, |
|
"learning_rate": 9.648678748607877e-05, |
|
"loss": 1.3399, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.2139930186674761, |
|
"grad_norm": 0.33931079506874084, |
|
"learning_rate": 9.648172522020858e-05, |
|
"loss": 2.0096, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.21429655486416754, |
|
"grad_norm": 0.38951364159584045, |
|
"learning_rate": 9.647666295433837e-05, |
|
"loss": 1.7676, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.214600091060859, |
|
"grad_norm": 0.408087819814682, |
|
"learning_rate": 9.647160068846817e-05, |
|
"loss": 1.7948, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.21490362725755047, |
|
"grad_norm": 0.37058812379837036, |
|
"learning_rate": 9.646653842259796e-05, |
|
"loss": 1.9891, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.21520716345424193, |
|
"grad_norm": 0.4003254473209381, |
|
"learning_rate": 9.646147615672776e-05, |
|
"loss": 1.8895, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.21551069965093336, |
|
"grad_norm": 0.38838204741477966, |
|
"learning_rate": 9.645641389085755e-05, |
|
"loss": 2.0121, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.21581423584762482, |
|
"grad_norm": 0.41912707686424255, |
|
"learning_rate": 9.645135162498735e-05, |
|
"loss": 1.9804, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.2161177720443163, |
|
"grad_norm": 0.353454053401947, |
|
"learning_rate": 9.644628935911714e-05, |
|
"loss": 2.0478, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.21642130824100775, |
|
"grad_norm": 0.3825720548629761, |
|
"learning_rate": 9.644122709324694e-05, |
|
"loss": 1.6676, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.21672484443769918, |
|
"grad_norm": 0.4197389781475067, |
|
"learning_rate": 9.643616482737675e-05, |
|
"loss": 1.9732, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.21702838063439064, |
|
"grad_norm": 0.4452435076236725, |
|
"learning_rate": 9.643110256150654e-05, |
|
"loss": 2.0918, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.2173319168310821, |
|
"grad_norm": 0.3366299271583557, |
|
"learning_rate": 9.642604029563634e-05, |
|
"loss": 1.7469, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.21763545302777357, |
|
"grad_norm": 0.31280553340911865, |
|
"learning_rate": 9.642097802976613e-05, |
|
"loss": 2.0348, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.21793898922446503, |
|
"grad_norm": 0.425503671169281, |
|
"learning_rate": 9.641591576389593e-05, |
|
"loss": 1.3629, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.21824252542115646, |
|
"grad_norm": 0.3986441493034363, |
|
"learning_rate": 9.641085349802572e-05, |
|
"loss": 1.4703, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.21854606161784793, |
|
"grad_norm": 0.34377026557922363, |
|
"learning_rate": 9.640579123215552e-05, |
|
"loss": 1.9788, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.2188495978145394, |
|
"grad_norm": 0.3445621430873871, |
|
"learning_rate": 9.640072896628531e-05, |
|
"loss": 1.9137, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.21915313401123085, |
|
"grad_norm": 0.40363574028015137, |
|
"learning_rate": 9.63956667004151e-05, |
|
"loss": 1.8911, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.21945667020792228, |
|
"grad_norm": 0.36166059970855713, |
|
"learning_rate": 9.639060443454491e-05, |
|
"loss": 1.9176, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.21976020640461374, |
|
"grad_norm": 0.7732321619987488, |
|
"learning_rate": 9.638554216867471e-05, |
|
"loss": 2.1942, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.2200637426013052, |
|
"grad_norm": 0.4042604863643646, |
|
"learning_rate": 9.63804799028045e-05, |
|
"loss": 1.8964, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.22036727879799667, |
|
"grad_norm": 0.3888862133026123, |
|
"learning_rate": 9.63754176369343e-05, |
|
"loss": 1.716, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.22067081499468813, |
|
"grad_norm": 0.32185250520706177, |
|
"learning_rate": 9.637035537106409e-05, |
|
"loss": 2.1227, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.22097435119137956, |
|
"grad_norm": 0.36421746015548706, |
|
"learning_rate": 9.636529310519389e-05, |
|
"loss": 1.3262, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.22127788738807103, |
|
"grad_norm": 0.42780765891075134, |
|
"learning_rate": 9.636023083932368e-05, |
|
"loss": 1.806, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.2215814235847625, |
|
"grad_norm": 0.3754510283470154, |
|
"learning_rate": 9.635516857345348e-05, |
|
"loss": 1.9286, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.22188495978145395, |
|
"grad_norm": 0.35199174284935, |
|
"learning_rate": 9.635010630758327e-05, |
|
"loss": 1.9703, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.22218849597814538, |
|
"grad_norm": 0.36272746324539185, |
|
"learning_rate": 9.634504404171307e-05, |
|
"loss": 1.7773, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.22249203217483685, |
|
"grad_norm": 0.4233802556991577, |
|
"learning_rate": 9.633998177584287e-05, |
|
"loss": 2.0016, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.2227955683715283, |
|
"grad_norm": 0.46138089895248413, |
|
"learning_rate": 9.633491950997267e-05, |
|
"loss": 1.764, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.22309910456821977, |
|
"grad_norm": 0.37863031029701233, |
|
"learning_rate": 9.632985724410246e-05, |
|
"loss": 1.6493, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.2234026407649112, |
|
"grad_norm": 0.4493837356567383, |
|
"learning_rate": 9.632479497823226e-05, |
|
"loss": 2.04, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.22370617696160267, |
|
"grad_norm": 0.581119179725647, |
|
"learning_rate": 9.631973271236205e-05, |
|
"loss": 1.777, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.22400971315829413, |
|
"grad_norm": 0.3730584979057312, |
|
"learning_rate": 9.631467044649185e-05, |
|
"loss": 1.8932, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.2243132493549856, |
|
"grad_norm": 0.351421594619751, |
|
"learning_rate": 9.630960818062164e-05, |
|
"loss": 2.3182, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.22461678555167705, |
|
"grad_norm": 0.4237976670265198, |
|
"learning_rate": 9.630454591475144e-05, |
|
"loss": 2.1315, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.22492032174836848, |
|
"grad_norm": 0.38544562458992004, |
|
"learning_rate": 9.629948364888123e-05, |
|
"loss": 1.9596, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.22522385794505995, |
|
"grad_norm": 0.407672256231308, |
|
"learning_rate": 9.629442138301104e-05, |
|
"loss": 1.8694, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.2255273941417514, |
|
"grad_norm": 0.4415782690048218, |
|
"learning_rate": 9.628935911714084e-05, |
|
"loss": 1.8658, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.22583093033844287, |
|
"grad_norm": 0.41300657391548157, |
|
"learning_rate": 9.628429685127063e-05, |
|
"loss": 2.0477, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.2261344665351343, |
|
"grad_norm": 0.36000654101371765, |
|
"learning_rate": 9.627923458540044e-05, |
|
"loss": 1.9045, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.22643800273182577, |
|
"grad_norm": 0.42653003334999084, |
|
"learning_rate": 9.627417231953023e-05, |
|
"loss": 1.2151, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.22674153892851723, |
|
"grad_norm": 0.4157649874687195, |
|
"learning_rate": 9.626911005366003e-05, |
|
"loss": 1.9335, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.2270450751252087, |
|
"grad_norm": 0.3805077373981476, |
|
"learning_rate": 9.626404778778982e-05, |
|
"loss": 2.0803, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.22734861132190012, |
|
"grad_norm": 0.39710867404937744, |
|
"learning_rate": 9.625898552191962e-05, |
|
"loss": 2.2628, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.22765214751859159, |
|
"grad_norm": 0.4012609124183655, |
|
"learning_rate": 9.625392325604941e-05, |
|
"loss": 1.9586, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.22795568371528305, |
|
"grad_norm": 0.9281008243560791, |
|
"learning_rate": 9.624886099017921e-05, |
|
"loss": 1.168, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.2282592199119745, |
|
"grad_norm": 0.36847764253616333, |
|
"learning_rate": 9.6243798724309e-05, |
|
"loss": 1.8907, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.22856275610866597, |
|
"grad_norm": 0.4531751573085785, |
|
"learning_rate": 9.623873645843881e-05, |
|
"loss": 1.4511, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.2288662923053574, |
|
"grad_norm": 0.36623820662498474, |
|
"learning_rate": 9.62336741925686e-05, |
|
"loss": 1.6707, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.22916982850204887, |
|
"grad_norm": 0.3104342222213745, |
|
"learning_rate": 9.62286119266984e-05, |
|
"loss": 1.988, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.22947336469874033, |
|
"grad_norm": 0.3790084421634674, |
|
"learning_rate": 9.62235496608282e-05, |
|
"loss": 1.979, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.2297769008954318, |
|
"grad_norm": 0.3642970323562622, |
|
"learning_rate": 9.621848739495799e-05, |
|
"loss": 1.9998, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.23008043709212322, |
|
"grad_norm": 0.34588292241096497, |
|
"learning_rate": 9.621342512908779e-05, |
|
"loss": 2.0511, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.2303839732888147, |
|
"grad_norm": 0.3556496798992157, |
|
"learning_rate": 9.620836286321758e-05, |
|
"loss": 1.8785, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.23068750948550615, |
|
"grad_norm": 0.4669034779071808, |
|
"learning_rate": 9.620330059734737e-05, |
|
"loss": 1.5027, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2309910456821976, |
|
"grad_norm": 0.39685994386672974, |
|
"learning_rate": 9.619823833147717e-05, |
|
"loss": 2.1644, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.23129458187888904, |
|
"grad_norm": 0.39183005690574646, |
|
"learning_rate": 9.619317606560698e-05, |
|
"loss": 1.9615, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.2315981180755805, |
|
"grad_norm": 0.36401331424713135, |
|
"learning_rate": 9.618811379973677e-05, |
|
"loss": 1.7535, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.23190165427227197, |
|
"grad_norm": 0.43118295073509216, |
|
"learning_rate": 9.618305153386657e-05, |
|
"loss": 1.884, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.23220519046896343, |
|
"grad_norm": 0.5061665177345276, |
|
"learning_rate": 9.617798926799636e-05, |
|
"loss": 2.0051, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.2325087266656549, |
|
"grad_norm": 0.4487472474575043, |
|
"learning_rate": 9.617292700212616e-05, |
|
"loss": 1.6831, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.23281226286234633, |
|
"grad_norm": 0.3660997450351715, |
|
"learning_rate": 9.616786473625595e-05, |
|
"loss": 1.9276, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.2331157990590378, |
|
"grad_norm": 0.3823026716709137, |
|
"learning_rate": 9.616280247038575e-05, |
|
"loss": 1.9817, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.23341933525572925, |
|
"grad_norm": 0.32568395137786865, |
|
"learning_rate": 9.615774020451554e-05, |
|
"loss": 1.508, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.2337228714524207, |
|
"grad_norm": 0.34985265135765076, |
|
"learning_rate": 9.615267793864534e-05, |
|
"loss": 1.6793, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.23402640764911214, |
|
"grad_norm": 0.38563957810401917, |
|
"learning_rate": 9.614761567277513e-05, |
|
"loss": 1.588, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.2343299438458036, |
|
"grad_norm": 0.33572301268577576, |
|
"learning_rate": 9.614255340690494e-05, |
|
"loss": 1.9541, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.23463348004249507, |
|
"grad_norm": 0.33936449885368347, |
|
"learning_rate": 9.613749114103473e-05, |
|
"loss": 1.9311, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.23493701623918653, |
|
"grad_norm": 0.34984657168388367, |
|
"learning_rate": 9.613242887516453e-05, |
|
"loss": 1.9532, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.235240552435878, |
|
"grad_norm": 0.3651373088359833, |
|
"learning_rate": 9.612736660929432e-05, |
|
"loss": 1.8815, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.23554408863256943, |
|
"grad_norm": 0.4317852854728699, |
|
"learning_rate": 9.612230434342412e-05, |
|
"loss": 2.0262, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.2358476248292609, |
|
"grad_norm": 0.375522255897522, |
|
"learning_rate": 9.611724207755391e-05, |
|
"loss": 1.9964, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.23615116102595235, |
|
"grad_norm": 0.37290844321250916, |
|
"learning_rate": 9.611217981168371e-05, |
|
"loss": 1.7456, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.2364546972226438, |
|
"grad_norm": 0.3768545985221863, |
|
"learning_rate": 9.61071175458135e-05, |
|
"loss": 1.9591, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.23675823341933525, |
|
"grad_norm": 0.3147246837615967, |
|
"learning_rate": 9.61020552799433e-05, |
|
"loss": 1.4033, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2370617696160267, |
|
"grad_norm": 0.4480874240398407, |
|
"learning_rate": 9.60969930140731e-05, |
|
"loss": 1.9598, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.23736530581271817, |
|
"grad_norm": 0.7287562489509583, |
|
"learning_rate": 9.60919307482029e-05, |
|
"loss": 2.0097, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.23766884200940963, |
|
"grad_norm": 0.36199334263801575, |
|
"learning_rate": 9.60868684823327e-05, |
|
"loss": 1.8089, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.23797237820610107, |
|
"grad_norm": 0.32855263352394104, |
|
"learning_rate": 9.608180621646249e-05, |
|
"loss": 2.0199, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.23827591440279253, |
|
"grad_norm": 0.37182894349098206, |
|
"learning_rate": 9.607674395059229e-05, |
|
"loss": 1.7253, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.238579450599484, |
|
"grad_norm": 0.3365595042705536, |
|
"learning_rate": 9.607168168472208e-05, |
|
"loss": 1.9308, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.23888298679617545, |
|
"grad_norm": 0.400685578584671, |
|
"learning_rate": 9.606661941885187e-05, |
|
"loss": 1.8939, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.2391865229928669, |
|
"grad_norm": 0.6354159116744995, |
|
"learning_rate": 9.606155715298167e-05, |
|
"loss": 2.1476, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.23949005918955835, |
|
"grad_norm": 0.4196738600730896, |
|
"learning_rate": 9.605649488711148e-05, |
|
"loss": 1.8457, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.2397935953862498, |
|
"grad_norm": 0.35839545726776123, |
|
"learning_rate": 9.605143262124127e-05, |
|
"loss": 1.824, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.24009713158294127, |
|
"grad_norm": 0.3597940504550934, |
|
"learning_rate": 9.604637035537107e-05, |
|
"loss": 1.9583, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.24040066777963273, |
|
"grad_norm": 0.5783160924911499, |
|
"learning_rate": 9.604130808950088e-05, |
|
"loss": 2.2, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.24070420397632417, |
|
"grad_norm": 0.3544808030128479, |
|
"learning_rate": 9.603624582363067e-05, |
|
"loss": 2.1092, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.24100774017301563, |
|
"grad_norm": 0.41170623898506165, |
|
"learning_rate": 9.603118355776047e-05, |
|
"loss": 1.6004, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.2413112763697071, |
|
"grad_norm": 0.3832992613315582, |
|
"learning_rate": 9.602612129189026e-05, |
|
"loss": 1.4981, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.24161481256639855, |
|
"grad_norm": 0.5239993333816528, |
|
"learning_rate": 9.602105902602006e-05, |
|
"loss": 1.6026, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.24191834876308999, |
|
"grad_norm": 0.38445138931274414, |
|
"learning_rate": 9.601599676014985e-05, |
|
"loss": 1.5765, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.24222188495978145, |
|
"grad_norm": 0.38520511984825134, |
|
"learning_rate": 9.601093449427964e-05, |
|
"loss": 2.1069, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.2425254211564729, |
|
"grad_norm": 0.3519560694694519, |
|
"learning_rate": 9.600587222840944e-05, |
|
"loss": 1.8896, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.24282895735316437, |
|
"grad_norm": 0.5392457246780396, |
|
"learning_rate": 9.600080996253923e-05, |
|
"loss": 1.6273, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24313249354985583, |
|
"grad_norm": 0.4213111996650696, |
|
"learning_rate": 9.599574769666904e-05, |
|
"loss": 1.489, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.24343602974654727, |
|
"grad_norm": 0.4006531834602356, |
|
"learning_rate": 9.599068543079884e-05, |
|
"loss": 1.9842, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.24373956594323873, |
|
"grad_norm": 0.3792324364185333, |
|
"learning_rate": 9.598562316492863e-05, |
|
"loss": 1.727, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.2440431021399302, |
|
"grad_norm": 0.3555270731449127, |
|
"learning_rate": 9.598056089905843e-05, |
|
"loss": 1.68, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.24434663833662165, |
|
"grad_norm": 0.33837342262268066, |
|
"learning_rate": 9.597549863318822e-05, |
|
"loss": 2.0709, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.2446501745333131, |
|
"grad_norm": 0.3812510371208191, |
|
"learning_rate": 9.597043636731802e-05, |
|
"loss": 2.1211, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.24495371073000455, |
|
"grad_norm": 0.33870792388916016, |
|
"learning_rate": 9.596537410144781e-05, |
|
"loss": 2.1047, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.245257246926696, |
|
"grad_norm": 0.3948252201080322, |
|
"learning_rate": 9.59603118355776e-05, |
|
"loss": 1.7553, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.24556078312338747, |
|
"grad_norm": 0.39410725235939026, |
|
"learning_rate": 9.59552495697074e-05, |
|
"loss": 1.9383, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.2458643193200789, |
|
"grad_norm": 0.37794989347457886, |
|
"learning_rate": 9.59501873038372e-05, |
|
"loss": 1.9115, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.24616785551677037, |
|
"grad_norm": 1.6270610094070435, |
|
"learning_rate": 9.5945125037967e-05, |
|
"loss": 1.8472, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.24647139171346183, |
|
"grad_norm": 0.3724587559700012, |
|
"learning_rate": 9.59400627720968e-05, |
|
"loss": 1.9087, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.2467749279101533, |
|
"grad_norm": 0.4097403585910797, |
|
"learning_rate": 9.59350005062266e-05, |
|
"loss": 1.8325, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.24707846410684475, |
|
"grad_norm": 0.4052940905094147, |
|
"learning_rate": 9.592993824035639e-05, |
|
"loss": 2.0241, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.2473820003035362, |
|
"grad_norm": 0.3887682557106018, |
|
"learning_rate": 9.592487597448618e-05, |
|
"loss": 1.6114, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.24768553650022765, |
|
"grad_norm": 0.404450386762619, |
|
"learning_rate": 9.591981370861598e-05, |
|
"loss": 1.8384, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.2479890726969191, |
|
"grad_norm": 0.7955893874168396, |
|
"learning_rate": 9.591475144274577e-05, |
|
"loss": 2.2149, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.24829260889361057, |
|
"grad_norm": 4.355859279632568, |
|
"learning_rate": 9.590968917687557e-05, |
|
"loss": 2.3753, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.248596145090302, |
|
"grad_norm": 0.3698444962501526, |
|
"learning_rate": 9.590462691100536e-05, |
|
"loss": 1.7354, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.24889968128699347, |
|
"grad_norm": 0.3658899962902069, |
|
"learning_rate": 9.589956464513517e-05, |
|
"loss": 1.7803, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.24920321748368493, |
|
"grad_norm": 0.405072957277298, |
|
"learning_rate": 9.589450237926497e-05, |
|
"loss": 1.7684, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.2495067536803764, |
|
"grad_norm": 0.7590973973274231, |
|
"learning_rate": 9.588944011339476e-05, |
|
"loss": 1.9466, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.24981028987706785, |
|
"grad_norm": 0.5217581987380981, |
|
"learning_rate": 9.588437784752456e-05, |
|
"loss": 2.1281, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.2501138260737593, |
|
"grad_norm": 0.3716435134410858, |
|
"learning_rate": 9.587931558165435e-05, |
|
"loss": 2.114, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.25041736227045075, |
|
"grad_norm": 0.44017624855041504, |
|
"learning_rate": 9.587425331578414e-05, |
|
"loss": 2.0445, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.2507208984671422, |
|
"grad_norm": 0.370370090007782, |
|
"learning_rate": 9.586919104991394e-05, |
|
"loss": 1.8674, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.2510244346638337, |
|
"grad_norm": 0.32125499844551086, |
|
"learning_rate": 9.586412878404373e-05, |
|
"loss": 1.4129, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.2513279708605251, |
|
"grad_norm": 0.4143073856830597, |
|
"learning_rate": 9.585906651817353e-05, |
|
"loss": 1.9895, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.2516315070572166, |
|
"grad_norm": 0.3492576777935028, |
|
"learning_rate": 9.585400425230334e-05, |
|
"loss": 2.0669, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.25193504325390803, |
|
"grad_norm": 0.4044751524925232, |
|
"learning_rate": 9.584894198643313e-05, |
|
"loss": 1.5909, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.25223857945059946, |
|
"grad_norm": 0.3410158157348633, |
|
"learning_rate": 9.584387972056293e-05, |
|
"loss": 1.7485, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.25254211564729095, |
|
"grad_norm": 0.340320348739624, |
|
"learning_rate": 9.583881745469272e-05, |
|
"loss": 1.8897, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.2528456518439824, |
|
"grad_norm": 0.35516276955604553, |
|
"learning_rate": 9.583375518882252e-05, |
|
"loss": 1.6332, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.2531491880406738, |
|
"grad_norm": 0.4099842309951782, |
|
"learning_rate": 9.582869292295232e-05, |
|
"loss": 1.5617, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.2534527242373653, |
|
"grad_norm": 0.38086098432540894, |
|
"learning_rate": 9.582363065708212e-05, |
|
"loss": 2.0837, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.25375626043405675, |
|
"grad_norm": 0.8040663003921509, |
|
"learning_rate": 9.581856839121191e-05, |
|
"loss": 1.8587, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.25405979663074824, |
|
"grad_norm": 0.41297683119773865, |
|
"learning_rate": 9.581350612534171e-05, |
|
"loss": 1.9602, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.25436333282743967, |
|
"grad_norm": 0.38155442476272583, |
|
"learning_rate": 9.58084438594715e-05, |
|
"loss": 1.375, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.2546668690241311, |
|
"grad_norm": 0.3956829905509949, |
|
"learning_rate": 9.58033815936013e-05, |
|
"loss": 1.9617, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.2549704052208226, |
|
"grad_norm": 0.38675928115844727, |
|
"learning_rate": 9.579831932773111e-05, |
|
"loss": 1.8186, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.255273941417514, |
|
"grad_norm": 0.33989018201828003, |
|
"learning_rate": 9.57932570618609e-05, |
|
"loss": 2.1734, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.2555774776142055, |
|
"grad_norm": 0.3240448534488678, |
|
"learning_rate": 9.57881947959907e-05, |
|
"loss": 1.6238, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.25588101381089695, |
|
"grad_norm": 0.6117075681686401, |
|
"learning_rate": 9.578313253012049e-05, |
|
"loss": 1.986, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.2561845500075884, |
|
"grad_norm": 0.3781290650367737, |
|
"learning_rate": 9.577807026425029e-05, |
|
"loss": 2.0021, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.2564880862042799, |
|
"grad_norm": 0.4373374879360199, |
|
"learning_rate": 9.577300799838008e-05, |
|
"loss": 2.0195, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.2567916224009713, |
|
"grad_norm": 0.4125923216342926, |
|
"learning_rate": 9.576794573250988e-05, |
|
"loss": 1.9412, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.2570951585976628, |
|
"grad_norm": 0.3557007908821106, |
|
"learning_rate": 9.576288346663967e-05, |
|
"loss": 1.8098, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.25739869479435423, |
|
"grad_norm": 0.49475541710853577, |
|
"learning_rate": 9.575782120076947e-05, |
|
"loss": 1.5756, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.25770223099104567, |
|
"grad_norm": 0.3507518768310547, |
|
"learning_rate": 9.575275893489926e-05, |
|
"loss": 1.6413, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.25800576718773716, |
|
"grad_norm": 0.39508333802223206, |
|
"learning_rate": 9.574769666902907e-05, |
|
"loss": 1.9777, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2583093033844286, |
|
"grad_norm": 0.328807532787323, |
|
"learning_rate": 9.574263440315886e-05, |
|
"loss": 1.4948, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.25861283958112, |
|
"grad_norm": 0.3154551386833191, |
|
"learning_rate": 9.573757213728866e-05, |
|
"loss": 1.7809, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.2589163757778115, |
|
"grad_norm": 0.502554178237915, |
|
"learning_rate": 9.573250987141845e-05, |
|
"loss": 1.4369, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.25921991197450295, |
|
"grad_norm": 0.4416670799255371, |
|
"learning_rate": 9.572744760554825e-05, |
|
"loss": 1.7364, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.25952344817119444, |
|
"grad_norm": 0.43228060007095337, |
|
"learning_rate": 9.572238533967804e-05, |
|
"loss": 1.3281, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.25982698436788587, |
|
"grad_norm": 0.3714723289012909, |
|
"learning_rate": 9.571732307380784e-05, |
|
"loss": 2.0893, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.2601305205645773, |
|
"grad_norm": 0.3309679925441742, |
|
"learning_rate": 9.571226080793763e-05, |
|
"loss": 1.7982, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.2604340567612688, |
|
"grad_norm": 0.3709767460823059, |
|
"learning_rate": 9.570719854206743e-05, |
|
"loss": 1.8628, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.26073759295796023, |
|
"grad_norm": 0.6020816564559937, |
|
"learning_rate": 9.570213627619724e-05, |
|
"loss": 2.0077, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.2610411291546517, |
|
"grad_norm": 0.30620431900024414, |
|
"learning_rate": 9.569707401032703e-05, |
|
"loss": 1.8834, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.26134466535134315, |
|
"grad_norm": 0.41518962383270264, |
|
"learning_rate": 9.569201174445683e-05, |
|
"loss": 1.8025, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.2616482015480346, |
|
"grad_norm": 0.3919786512851715, |
|
"learning_rate": 9.568694947858662e-05, |
|
"loss": 1.995, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.2619517377447261, |
|
"grad_norm": 0.47429168224334717, |
|
"learning_rate": 9.568188721271641e-05, |
|
"loss": 1.9423, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.2622552739414175, |
|
"grad_norm": 0.8941421508789062, |
|
"learning_rate": 9.567682494684621e-05, |
|
"loss": 1.5046, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.26255881013810894, |
|
"grad_norm": 0.4357859194278717, |
|
"learning_rate": 9.5671762680976e-05, |
|
"loss": 2.0023, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.26286234633480043, |
|
"grad_norm": 0.3873944878578186, |
|
"learning_rate": 9.56667004151058e-05, |
|
"loss": 2.0607, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.26316588253149187, |
|
"grad_norm": 0.4355853497982025, |
|
"learning_rate": 9.56616381492356e-05, |
|
"loss": 1.8254, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.26346941872818336, |
|
"grad_norm": 0.3882213234901428, |
|
"learning_rate": 9.56565758833654e-05, |
|
"loss": 1.7809, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.2637729549248748, |
|
"grad_norm": 0.4021656811237335, |
|
"learning_rate": 9.56515136174952e-05, |
|
"loss": 2.1321, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.2640764911215662, |
|
"grad_norm": 0.43587526679039, |
|
"learning_rate": 9.564645135162499e-05, |
|
"loss": 1.7865, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2643800273182577, |
|
"grad_norm": 0.364045649766922, |
|
"learning_rate": 9.564138908575479e-05, |
|
"loss": 1.8173, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.26468356351494915, |
|
"grad_norm": 0.3956625461578369, |
|
"learning_rate": 9.563632681988458e-05, |
|
"loss": 1.4822, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.26498709971164064, |
|
"grad_norm": 0.40755051374435425, |
|
"learning_rate": 9.563126455401438e-05, |
|
"loss": 1.9418, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.2652906359083321, |
|
"grad_norm": 0.39405086636543274, |
|
"learning_rate": 9.562620228814417e-05, |
|
"loss": 1.4529, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.2655941721050235, |
|
"grad_norm": 0.4400351047515869, |
|
"learning_rate": 9.562114002227397e-05, |
|
"loss": 2.1095, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.265897708301715, |
|
"grad_norm": 0.40135496854782104, |
|
"learning_rate": 9.561607775640376e-05, |
|
"loss": 1.9462, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.26620124449840643, |
|
"grad_norm": 0.5949604511260986, |
|
"learning_rate": 9.561101549053356e-05, |
|
"loss": 1.8797, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.26650478069509786, |
|
"grad_norm": 0.38301005959510803, |
|
"learning_rate": 9.560595322466336e-05, |
|
"loss": 2.0887, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.26680831689178935, |
|
"grad_norm": 0.6215627789497375, |
|
"learning_rate": 9.560089095879317e-05, |
|
"loss": 1.7846, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.2671118530884808, |
|
"grad_norm": 0.4041058123111725, |
|
"learning_rate": 9.559582869292297e-05, |
|
"loss": 1.5127, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2674153892851723, |
|
"grad_norm": 0.30281975865364075, |
|
"learning_rate": 9.559076642705276e-05, |
|
"loss": 1.8487, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.2677189254818637, |
|
"grad_norm": 0.34536200761795044, |
|
"learning_rate": 9.558570416118256e-05, |
|
"loss": 1.8976, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.26802246167855515, |
|
"grad_norm": 0.367245614528656, |
|
"learning_rate": 9.558064189531235e-05, |
|
"loss": 1.9804, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.26832599787524664, |
|
"grad_norm": 0.41750359535217285, |
|
"learning_rate": 9.557557962944215e-05, |
|
"loss": 1.5932, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.26862953407193807, |
|
"grad_norm": 0.7777047157287598, |
|
"learning_rate": 9.557051736357194e-05, |
|
"loss": 1.8513, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.26893307026862956, |
|
"grad_norm": 0.3720252215862274, |
|
"learning_rate": 9.556545509770174e-05, |
|
"loss": 2.1819, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.269236606465321, |
|
"grad_norm": 0.7321712970733643, |
|
"learning_rate": 9.556039283183153e-05, |
|
"loss": 1.4653, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.2695401426620124, |
|
"grad_norm": 0.4140429198741913, |
|
"learning_rate": 9.555533056596133e-05, |
|
"loss": 1.9816, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.2698436788587039, |
|
"grad_norm": 0.40684935450553894, |
|
"learning_rate": 9.555026830009113e-05, |
|
"loss": 1.5866, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.27014721505539535, |
|
"grad_norm": 0.4067225754261017, |
|
"learning_rate": 9.554520603422093e-05, |
|
"loss": 1.5951, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.2704507512520868, |
|
"grad_norm": 0.34240391850471497, |
|
"learning_rate": 9.554014376835072e-05, |
|
"loss": 1.9076, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.2707542874487783, |
|
"grad_norm": 0.4634522795677185, |
|
"learning_rate": 9.553508150248052e-05, |
|
"loss": 1.9856, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.2710578236454697, |
|
"grad_norm": 0.408015638589859, |
|
"learning_rate": 9.553001923661031e-05, |
|
"loss": 1.7997, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.2713613598421612, |
|
"grad_norm": 0.3894648253917694, |
|
"learning_rate": 9.552495697074011e-05, |
|
"loss": 1.8381, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.27166489603885263, |
|
"grad_norm": 0.37494730949401855, |
|
"learning_rate": 9.55198947048699e-05, |
|
"loss": 2.0548, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.27196843223554407, |
|
"grad_norm": 0.39796411991119385, |
|
"learning_rate": 9.55148324389997e-05, |
|
"loss": 1.9272, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.27227196843223556, |
|
"grad_norm": 0.40153494477272034, |
|
"learning_rate": 9.550977017312949e-05, |
|
"loss": 1.7136, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.272575504628927, |
|
"grad_norm": 0.39771386981010437, |
|
"learning_rate": 9.55047079072593e-05, |
|
"loss": 2.1017, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.2728790408256185, |
|
"grad_norm": 0.4085974097251892, |
|
"learning_rate": 9.54996456413891e-05, |
|
"loss": 1.3951, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.2731825770223099, |
|
"grad_norm": 0.39849239587783813, |
|
"learning_rate": 9.549458337551889e-05, |
|
"loss": 1.9988, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.27348611321900135, |
|
"grad_norm": 0.38662001490592957, |
|
"learning_rate": 9.548952110964868e-05, |
|
"loss": 1.8491, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.27378964941569284, |
|
"grad_norm": 0.38078710436820984, |
|
"learning_rate": 9.548445884377848e-05, |
|
"loss": 1.9, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.27409318561238427, |
|
"grad_norm": 0.3548724949359894, |
|
"learning_rate": 9.547939657790827e-05, |
|
"loss": 1.8754, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.2743967218090757, |
|
"grad_norm": 0.37712323665618896, |
|
"learning_rate": 9.547433431203807e-05, |
|
"loss": 1.5497, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.2747002580057672, |
|
"grad_norm": 0.4060449004173279, |
|
"learning_rate": 9.546927204616786e-05, |
|
"loss": 1.7231, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.27500379420245863, |
|
"grad_norm": 0.42080479860305786, |
|
"learning_rate": 9.546420978029766e-05, |
|
"loss": 2.1538, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.2753073303991501, |
|
"grad_norm": 0.4034046232700348, |
|
"learning_rate": 9.545914751442747e-05, |
|
"loss": 1.7335, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.27561086659584155, |
|
"grad_norm": 0.3676345646381378, |
|
"learning_rate": 9.545408524855726e-05, |
|
"loss": 1.6193, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.275914402792533, |
|
"grad_norm": 0.3349851965904236, |
|
"learning_rate": 9.544902298268706e-05, |
|
"loss": 1.8997, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.2762179389892245, |
|
"grad_norm": 0.3676302134990692, |
|
"learning_rate": 9.544396071681685e-05, |
|
"loss": 1.4031, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2765214751859159, |
|
"grad_norm": 0.36593666672706604, |
|
"learning_rate": 9.543889845094665e-05, |
|
"loss": 1.8838, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.2768250113826074, |
|
"grad_norm": 0.3793712258338928, |
|
"learning_rate": 9.543383618507644e-05, |
|
"loss": 1.5949, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.27712854757929883, |
|
"grad_norm": 0.47586631774902344, |
|
"learning_rate": 9.542877391920624e-05, |
|
"loss": 1.5687, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.27743208377599027, |
|
"grad_norm": 0.38850024342536926, |
|
"learning_rate": 9.542371165333603e-05, |
|
"loss": 1.7336, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.27773561997268176, |
|
"grad_norm": 0.4039680063724518, |
|
"learning_rate": 9.541864938746583e-05, |
|
"loss": 2.0476, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.2780391561693732, |
|
"grad_norm": 0.40498992800712585, |
|
"learning_rate": 9.541358712159562e-05, |
|
"loss": 1.6699, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.2783426923660646, |
|
"grad_norm": 0.39011168479919434, |
|
"learning_rate": 9.540852485572543e-05, |
|
"loss": 1.9935, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.2786462285627561, |
|
"grad_norm": 0.3864549696445465, |
|
"learning_rate": 9.540346258985522e-05, |
|
"loss": 1.8271, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.27894976475944755, |
|
"grad_norm": 0.33493247628211975, |
|
"learning_rate": 9.539840032398502e-05, |
|
"loss": 1.856, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.27925330095613904, |
|
"grad_norm": 0.34132060408592224, |
|
"learning_rate": 9.539333805811481e-05, |
|
"loss": 1.8836, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2795568371528305, |
|
"grad_norm": 1.5312176942825317, |
|
"learning_rate": 9.538827579224461e-05, |
|
"loss": 2.0207, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.2798603733495219, |
|
"grad_norm": 0.333932489156723, |
|
"learning_rate": 9.53832135263744e-05, |
|
"loss": 2.0908, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.2801639095462134, |
|
"grad_norm": 0.3688269555568695, |
|
"learning_rate": 9.537815126050421e-05, |
|
"loss": 1.8464, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.28046744574290483, |
|
"grad_norm": 0.4097294211387634, |
|
"learning_rate": 9.5373088994634e-05, |
|
"loss": 1.6891, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.2807709819395963, |
|
"grad_norm": 0.3737453818321228, |
|
"learning_rate": 9.53680267287638e-05, |
|
"loss": 2.0549, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.28107451813628775, |
|
"grad_norm": 0.6109428405761719, |
|
"learning_rate": 9.53629644628936e-05, |
|
"loss": 1.9437, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.2813780543329792, |
|
"grad_norm": 0.46215322613716125, |
|
"learning_rate": 9.535790219702339e-05, |
|
"loss": 1.5133, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.2816815905296707, |
|
"grad_norm": 0.8070108294487, |
|
"learning_rate": 9.53528399311532e-05, |
|
"loss": 1.8843, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.2819851267263621, |
|
"grad_norm": 0.40304142236709595, |
|
"learning_rate": 9.534777766528299e-05, |
|
"loss": 1.9742, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.2822886629230536, |
|
"grad_norm": 0.35046708583831787, |
|
"learning_rate": 9.534271539941279e-05, |
|
"loss": 1.8969, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.28259219911974504, |
|
"grad_norm": 0.37241777777671814, |
|
"learning_rate": 9.533765313354258e-05, |
|
"loss": 1.8138, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.28289573531643647, |
|
"grad_norm": 0.38689473271369934, |
|
"learning_rate": 9.533259086767238e-05, |
|
"loss": 1.669, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.28319927151312796, |
|
"grad_norm": 0.3672066926956177, |
|
"learning_rate": 9.532752860180217e-05, |
|
"loss": 1.9093, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.2835028077098194, |
|
"grad_norm": 0.4022217392921448, |
|
"learning_rate": 9.532246633593197e-05, |
|
"loss": 1.6959, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.2838063439065108, |
|
"grad_norm": 0.3894721269607544, |
|
"learning_rate": 9.531740407006176e-05, |
|
"loss": 1.9898, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2841098801032023, |
|
"grad_norm": 0.4395015835762024, |
|
"learning_rate": 9.531234180419156e-05, |
|
"loss": 1.5538, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.28441341629989375, |
|
"grad_norm": 0.8121886849403381, |
|
"learning_rate": 9.530727953832136e-05, |
|
"loss": 1.7403, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.28471695249658524, |
|
"grad_norm": 0.40073227882385254, |
|
"learning_rate": 9.530221727245116e-05, |
|
"loss": 2.0544, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.2850204886932767, |
|
"grad_norm": 0.3571331202983856, |
|
"learning_rate": 9.529715500658095e-05, |
|
"loss": 1.7157, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.2853240248899681, |
|
"grad_norm": 0.485147625207901, |
|
"learning_rate": 9.529209274071075e-05, |
|
"loss": 2.1489, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2856275610866596, |
|
"grad_norm": 0.6882160305976868, |
|
"learning_rate": 9.528703047484054e-05, |
|
"loss": 1.8458, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.28593109728335103, |
|
"grad_norm": 0.7156968116760254, |
|
"learning_rate": 9.528196820897034e-05, |
|
"loss": 1.9529, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.2862346334800425, |
|
"grad_norm": 0.4198112487792969, |
|
"learning_rate": 9.527690594310013e-05, |
|
"loss": 2.0355, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.28653816967673396, |
|
"grad_norm": 0.4178343117237091, |
|
"learning_rate": 9.527184367722993e-05, |
|
"loss": 1.5801, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.2868417058734254, |
|
"grad_norm": 0.3721866011619568, |
|
"learning_rate": 9.526678141135972e-05, |
|
"loss": 2.1657, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.2871452420701169, |
|
"grad_norm": 0.38586944341659546, |
|
"learning_rate": 9.526171914548953e-05, |
|
"loss": 1.4879, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.2874487782668083, |
|
"grad_norm": 0.42727598547935486, |
|
"learning_rate": 9.525665687961933e-05, |
|
"loss": 1.8434, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.28775231446349975, |
|
"grad_norm": 0.3686284124851227, |
|
"learning_rate": 9.525159461374912e-05, |
|
"loss": 1.9346, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.28805585066019124, |
|
"grad_norm": 0.41984260082244873, |
|
"learning_rate": 9.524653234787892e-05, |
|
"loss": 1.4474, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.28835938685688267, |
|
"grad_norm": 0.4530123174190521, |
|
"learning_rate": 9.524147008200871e-05, |
|
"loss": 1.6863, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.28866292305357416, |
|
"grad_norm": 0.40047594904899597, |
|
"learning_rate": 9.52364078161385e-05, |
|
"loss": 1.908, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.2889664592502656, |
|
"grad_norm": 0.3757762610912323, |
|
"learning_rate": 9.52313455502683e-05, |
|
"loss": 1.6235, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.28926999544695703, |
|
"grad_norm": 0.4337126612663269, |
|
"learning_rate": 9.52262832843981e-05, |
|
"loss": 1.6229, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.2895735316436485, |
|
"grad_norm": 0.4407886564731598, |
|
"learning_rate": 9.522122101852789e-05, |
|
"loss": 1.875, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.28987706784033995, |
|
"grad_norm": 0.5278657674789429, |
|
"learning_rate": 9.521615875265768e-05, |
|
"loss": 1.7199, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.29018060403703144, |
|
"grad_norm": 0.4441334307193756, |
|
"learning_rate": 9.521109648678749e-05, |
|
"loss": 1.1319, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.2904841402337229, |
|
"grad_norm": 0.3992663025856018, |
|
"learning_rate": 9.520603422091729e-05, |
|
"loss": 1.6948, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.2907876764304143, |
|
"grad_norm": 0.3979544937610626, |
|
"learning_rate": 9.520097195504708e-05, |
|
"loss": 1.8689, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.2910912126271058, |
|
"grad_norm": 0.4011298418045044, |
|
"learning_rate": 9.519590968917688e-05, |
|
"loss": 1.9491, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.29139474882379723, |
|
"grad_norm": 0.4377354383468628, |
|
"learning_rate": 9.519084742330667e-05, |
|
"loss": 1.7274, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.29169828502048867, |
|
"grad_norm": 0.5056617856025696, |
|
"learning_rate": 9.518578515743647e-05, |
|
"loss": 2.006, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.29200182121718016, |
|
"grad_norm": 0.36736002564430237, |
|
"learning_rate": 9.518072289156626e-05, |
|
"loss": 1.6558, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.2923053574138716, |
|
"grad_norm": 0.37966540455818176, |
|
"learning_rate": 9.517566062569606e-05, |
|
"loss": 2.0098, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.2926088936105631, |
|
"grad_norm": 0.4026505947113037, |
|
"learning_rate": 9.517059835982585e-05, |
|
"loss": 1.868, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.2929124298072545, |
|
"grad_norm": 0.461910218000412, |
|
"learning_rate": 9.516553609395566e-05, |
|
"loss": 2.1131, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.29321596600394595, |
|
"grad_norm": 0.4329175651073456, |
|
"learning_rate": 9.516047382808545e-05, |
|
"loss": 2.0068, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.29351950220063744, |
|
"grad_norm": 0.7611956000328064, |
|
"learning_rate": 9.515541156221526e-05, |
|
"loss": 1.9177, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.2938230383973289, |
|
"grad_norm": 0.6180218458175659, |
|
"learning_rate": 9.515034929634506e-05, |
|
"loss": 1.5603, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.29412657459402036, |
|
"grad_norm": 0.6556726694107056, |
|
"learning_rate": 9.514528703047485e-05, |
|
"loss": 2.1081, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.2944301107907118, |
|
"grad_norm": 0.3379404842853546, |
|
"learning_rate": 9.514022476460465e-05, |
|
"loss": 1.9701, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.29473364698740323, |
|
"grad_norm": 0.42676112055778503, |
|
"learning_rate": 9.513516249873444e-05, |
|
"loss": 1.6116, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.2950371831840947, |
|
"grad_norm": 0.35374894738197327, |
|
"learning_rate": 9.513010023286424e-05, |
|
"loss": 2.0621, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.29534071938078615, |
|
"grad_norm": 0.33012476563453674, |
|
"learning_rate": 9.512503796699403e-05, |
|
"loss": 1.4534, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.2956442555774776, |
|
"grad_norm": 0.37993383407592773, |
|
"learning_rate": 9.511997570112383e-05, |
|
"loss": 1.6306, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.2959477917741691, |
|
"grad_norm": 0.47140204906463623, |
|
"learning_rate": 9.511491343525362e-05, |
|
"loss": 2.0465, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.2962513279708605, |
|
"grad_norm": 0.40235936641693115, |
|
"learning_rate": 9.510985116938343e-05, |
|
"loss": 1.8247, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.296554864167552, |
|
"grad_norm": 0.3992665112018585, |
|
"learning_rate": 9.510478890351322e-05, |
|
"loss": 1.5702, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.29685840036424344, |
|
"grad_norm": 0.4469521641731262, |
|
"learning_rate": 9.509972663764302e-05, |
|
"loss": 1.8811, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.29716193656093487, |
|
"grad_norm": 0.41400644183158875, |
|
"learning_rate": 9.509466437177281e-05, |
|
"loss": 1.5374, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.29746547275762636, |
|
"grad_norm": 0.36348387598991394, |
|
"learning_rate": 9.508960210590261e-05, |
|
"loss": 1.9022, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2977690089543178, |
|
"grad_norm": 0.4069242477416992, |
|
"learning_rate": 9.50845398400324e-05, |
|
"loss": 2.0066, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.2980725451510093, |
|
"grad_norm": 0.3684113323688507, |
|
"learning_rate": 9.50794775741622e-05, |
|
"loss": 1.8972, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.2983760813477007, |
|
"grad_norm": 0.40827688574790955, |
|
"learning_rate": 9.5074415308292e-05, |
|
"loss": 2.0659, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.29867961754439215, |
|
"grad_norm": 0.32065409421920776, |
|
"learning_rate": 9.506935304242179e-05, |
|
"loss": 2.0008, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.29898315374108364, |
|
"grad_norm": 0.38805294036865234, |
|
"learning_rate": 9.50642907765516e-05, |
|
"loss": 1.5027, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.2992866899377751, |
|
"grad_norm": 0.3656708896160126, |
|
"learning_rate": 9.505922851068139e-05, |
|
"loss": 1.7931, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.2995902261344665, |
|
"grad_norm": 0.4354289770126343, |
|
"learning_rate": 9.505416624481119e-05, |
|
"loss": 2.1183, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.299893762331158, |
|
"grad_norm": 0.3970641493797302, |
|
"learning_rate": 9.504910397894098e-05, |
|
"loss": 1.8188, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.30019729852784943, |
|
"grad_norm": 0.35527995228767395, |
|
"learning_rate": 9.504404171307078e-05, |
|
"loss": 1.6329, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.3005008347245409, |
|
"grad_norm": 0.4018630385398865, |
|
"learning_rate": 9.503897944720057e-05, |
|
"loss": 1.993, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.30080437092123236, |
|
"grad_norm": 0.36514052748680115, |
|
"learning_rate": 9.503391718133037e-05, |
|
"loss": 2.0482, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.3011079071179238, |
|
"grad_norm": 0.3790993094444275, |
|
"learning_rate": 9.502885491546016e-05, |
|
"loss": 2.0286, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.3014114433146153, |
|
"grad_norm": 0.314779669046402, |
|
"learning_rate": 9.502379264958995e-05, |
|
"loss": 1.8135, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.3017149795113067, |
|
"grad_norm": 0.42383378744125366, |
|
"learning_rate": 9.501873038371975e-05, |
|
"loss": 1.8783, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.3020185157079982, |
|
"grad_norm": 0.4036683738231659, |
|
"learning_rate": 9.501366811784956e-05, |
|
"loss": 1.6091, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.30232205190468964, |
|
"grad_norm": 0.3611324429512024, |
|
"learning_rate": 9.500860585197935e-05, |
|
"loss": 1.3388, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.30262558810138107, |
|
"grad_norm": 0.44210389256477356, |
|
"learning_rate": 9.500354358610915e-05, |
|
"loss": 1.6133, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.30292912429807256, |
|
"grad_norm": 0.37780526280403137, |
|
"learning_rate": 9.499848132023894e-05, |
|
"loss": 1.9993, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.303232660494764, |
|
"grad_norm": 0.469959557056427, |
|
"learning_rate": 9.499341905436874e-05, |
|
"loss": 1.8094, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.30353619669145543, |
|
"grad_norm": 0.38992664217948914, |
|
"learning_rate": 9.498835678849853e-05, |
|
"loss": 1.8975, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 19764, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.448897543149158e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|