|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008, |
|
"grad_norm": 5.9686970710754395, |
|
"learning_rate": 4.9999921043206356e-06, |
|
"loss": 6.1536, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016, |
|
"grad_norm": 5.313859462738037, |
|
"learning_rate": 4.999968417332415e-06, |
|
"loss": 1.8192, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0024, |
|
"grad_norm": 3.8611130714416504, |
|
"learning_rate": 4.999928939184958e-06, |
|
"loss": 5.7147, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0032, |
|
"grad_norm": 8.215139389038086, |
|
"learning_rate": 4.99987367012763e-06, |
|
"loss": 1.9633, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004, |
|
"grad_norm": 2.859307050704956, |
|
"learning_rate": 4.999802610509541e-06, |
|
"loss": 5.4413, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0048, |
|
"grad_norm": 10.999748229980469, |
|
"learning_rate": 4.999715760779541e-06, |
|
"loss": 1.9931, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0056, |
|
"grad_norm": 2.5857369899749756, |
|
"learning_rate": 4.999613121486222e-06, |
|
"loss": 5.2138, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0064, |
|
"grad_norm": 4.739017009735107, |
|
"learning_rate": 4.9994946932779076e-06, |
|
"loss": 1.5203, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0072, |
|
"grad_norm": 2.03410267829895, |
|
"learning_rate": 4.999360476902656e-06, |
|
"loss": 5.1349, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 4.154623508453369, |
|
"learning_rate": 4.99921047320825e-06, |
|
"loss": 1.6121, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0088, |
|
"grad_norm": 1.8263472318649292, |
|
"learning_rate": 4.999044683142196e-06, |
|
"loss": 4.9737, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0096, |
|
"grad_norm": 4.39143705368042, |
|
"learning_rate": 4.998863107751711e-06, |
|
"loss": 1.4866, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0104, |
|
"grad_norm": 1.6841758489608765, |
|
"learning_rate": 4.998665748183727e-06, |
|
"loss": 5.0078, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0112, |
|
"grad_norm": 4.099013805389404, |
|
"learning_rate": 4.998452605684874e-06, |
|
"loss": 1.6304, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.012, |
|
"grad_norm": 1.6769129037857056, |
|
"learning_rate": 4.9982236816014735e-06, |
|
"loss": 4.8359, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0128, |
|
"grad_norm": 3.6601269245147705, |
|
"learning_rate": 4.9979789773795365e-06, |
|
"loss": 1.6408, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0136, |
|
"grad_norm": 1.6234138011932373, |
|
"learning_rate": 4.997718494564747e-06, |
|
"loss": 4.9268, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0144, |
|
"grad_norm": 4.540709018707275, |
|
"learning_rate": 4.9974422348024565e-06, |
|
"loss": 1.4653, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0152, |
|
"grad_norm": 2.201477527618408, |
|
"learning_rate": 4.997150199837671e-06, |
|
"loss": 4.8255, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 3.3589704036712646, |
|
"learning_rate": 4.996842391515045e-06, |
|
"loss": 1.3599, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0168, |
|
"grad_norm": 1.7828714847564697, |
|
"learning_rate": 4.996518811778858e-06, |
|
"loss": 4.7924, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0176, |
|
"grad_norm": 4.722200870513916, |
|
"learning_rate": 4.99617946267302e-06, |
|
"loss": 1.8165, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0184, |
|
"grad_norm": 1.5609182119369507, |
|
"learning_rate": 4.995824346341041e-06, |
|
"loss": 4.8322, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0192, |
|
"grad_norm": 3.8967134952545166, |
|
"learning_rate": 4.995453465026033e-06, |
|
"loss": 1.49, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.199491024017334, |
|
"learning_rate": 4.9950668210706795e-06, |
|
"loss": 4.6516, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0208, |
|
"grad_norm": 4.164550304412842, |
|
"learning_rate": 4.994664416917236e-06, |
|
"loss": 1.3359, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0216, |
|
"grad_norm": 1.9813035726547241, |
|
"learning_rate": 4.994246255107506e-06, |
|
"loss": 4.6697, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0224, |
|
"grad_norm": 5.564512729644775, |
|
"learning_rate": 4.993812338282826e-06, |
|
"loss": 1.6348, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0232, |
|
"grad_norm": 2.7316086292266846, |
|
"learning_rate": 4.993362669184051e-06, |
|
"loss": 4.4999, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 4.501605987548828, |
|
"learning_rate": 4.992897250651535e-06, |
|
"loss": 1.4643, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0248, |
|
"grad_norm": 1.8927552700042725, |
|
"learning_rate": 4.992416085625115e-06, |
|
"loss": 4.7085, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0256, |
|
"grad_norm": 4.785287380218506, |
|
"learning_rate": 4.9919191771440905e-06, |
|
"loss": 1.3398, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0264, |
|
"grad_norm": 2.4881515502929688, |
|
"learning_rate": 4.991406528347206e-06, |
|
"loss": 4.5912, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0272, |
|
"grad_norm": 4.189312934875488, |
|
"learning_rate": 4.990878142472628e-06, |
|
"loss": 1.4647, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.028, |
|
"grad_norm": 2.654892921447754, |
|
"learning_rate": 4.990334022857932e-06, |
|
"loss": 4.4038, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0288, |
|
"grad_norm": 5.841195583343506, |
|
"learning_rate": 4.989774172940071e-06, |
|
"loss": 1.5347, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0296, |
|
"grad_norm": 3.269841432571411, |
|
"learning_rate": 4.989198596255361e-06, |
|
"loss": 4.4978, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0304, |
|
"grad_norm": 3.6912543773651123, |
|
"learning_rate": 4.988607296439459e-06, |
|
"loss": 1.3615, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0312, |
|
"grad_norm": 3.773468255996704, |
|
"learning_rate": 4.988000277227334e-06, |
|
"loss": 4.4462, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 4.216678142547607, |
|
"learning_rate": 4.9873775424532515e-06, |
|
"loss": 1.3803, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0328, |
|
"grad_norm": 4.231056213378906, |
|
"learning_rate": 4.98673909605074e-06, |
|
"loss": 4.4349, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0336, |
|
"grad_norm": 4.05332088470459, |
|
"learning_rate": 4.986084942052577e-06, |
|
"loss": 1.3321, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0344, |
|
"grad_norm": 3.9502322673797607, |
|
"learning_rate": 4.985415084590752e-06, |
|
"loss": 4.2693, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0352, |
|
"grad_norm": 8.568007469177246, |
|
"learning_rate": 4.984729527896451e-06, |
|
"loss": 1.6135, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.036, |
|
"grad_norm": 4.460508346557617, |
|
"learning_rate": 4.984028276300021e-06, |
|
"loss": 4.4412, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0368, |
|
"grad_norm": 7.591355323791504, |
|
"learning_rate": 4.9833113342309495e-06, |
|
"loss": 1.6569, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0376, |
|
"grad_norm": 5.810396194458008, |
|
"learning_rate": 4.9825787062178315e-06, |
|
"loss": 4.1632, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0384, |
|
"grad_norm": 10.894949913024902, |
|
"learning_rate": 4.9818303968883445e-06, |
|
"loss": 1.6721, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0392, |
|
"grad_norm": 4.217193126678467, |
|
"learning_rate": 4.981066410969215e-06, |
|
"loss": 4.1738, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 8.75684642791748, |
|
"learning_rate": 4.980286753286196e-06, |
|
"loss": 1.3856, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0408, |
|
"grad_norm": 3.8983495235443115, |
|
"learning_rate": 4.9794914287640264e-06, |
|
"loss": 4.0982, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0416, |
|
"grad_norm": 6.7597527503967285, |
|
"learning_rate": 4.978680442426409e-06, |
|
"loss": 1.4406, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0424, |
|
"grad_norm": 5.493980407714844, |
|
"learning_rate": 4.977853799395976e-06, |
|
"loss": 4.3028, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0432, |
|
"grad_norm": 7.1781487464904785, |
|
"learning_rate": 4.977011504894253e-06, |
|
"loss": 1.4716, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.044, |
|
"grad_norm": 4.196126937866211, |
|
"learning_rate": 4.9761535642416284e-06, |
|
"loss": 4.1292, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0448, |
|
"grad_norm": 7.720696926116943, |
|
"learning_rate": 4.975279982857324e-06, |
|
"loss": 1.5968, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0456, |
|
"grad_norm": 1.6588771343231201, |
|
"learning_rate": 4.974390766259353e-06, |
|
"loss": 4.2463, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0464, |
|
"grad_norm": 10.156800270080566, |
|
"learning_rate": 4.973485920064491e-06, |
|
"loss": 1.4834, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0472, |
|
"grad_norm": 1.59371018409729, |
|
"learning_rate": 4.972565449988238e-06, |
|
"loss": 4.0996, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 8.833647727966309, |
|
"learning_rate": 4.971629361844785e-06, |
|
"loss": 1.6226, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0488, |
|
"grad_norm": 1.8904303312301636, |
|
"learning_rate": 4.970677661546972e-06, |
|
"loss": 4.1373, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0496, |
|
"grad_norm": 7.343002796173096, |
|
"learning_rate": 4.969710355106256e-06, |
|
"loss": 1.5989, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0504, |
|
"grad_norm": 1.5326752662658691, |
|
"learning_rate": 4.968727448632669e-06, |
|
"loss": 4.067, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0512, |
|
"grad_norm": 5.595536708831787, |
|
"learning_rate": 4.967728948334784e-06, |
|
"loss": 1.515, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.052, |
|
"grad_norm": 2.240656852722168, |
|
"learning_rate": 4.96671486051967e-06, |
|
"loss": 3.9452, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0528, |
|
"grad_norm": 8.656717300415039, |
|
"learning_rate": 4.965685191592859e-06, |
|
"loss": 1.7592, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0536, |
|
"grad_norm": 1.6276272535324097, |
|
"learning_rate": 4.964639948058297e-06, |
|
"loss": 3.9894, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0544, |
|
"grad_norm": 5.7422075271606445, |
|
"learning_rate": 4.963579136518312e-06, |
|
"loss": 1.5689, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0552, |
|
"grad_norm": 1.9765911102294922, |
|
"learning_rate": 4.962502763673566e-06, |
|
"loss": 4.0761, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 6.2184224128723145, |
|
"learning_rate": 4.961410836323014e-06, |
|
"loss": 1.5643, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0568, |
|
"grad_norm": 1.7013366222381592, |
|
"learning_rate": 4.960303361363863e-06, |
|
"loss": 3.9535, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0576, |
|
"grad_norm": 5.7151713371276855, |
|
"learning_rate": 4.959180345791528e-06, |
|
"loss": 1.3778, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0584, |
|
"grad_norm": 2.092637777328491, |
|
"learning_rate": 4.958041796699583e-06, |
|
"loss": 4.043, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0592, |
|
"grad_norm": 6.953094482421875, |
|
"learning_rate": 4.956887721279726e-06, |
|
"loss": 1.4149, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.5431764125823975, |
|
"learning_rate": 4.9557181268217225e-06, |
|
"loss": 4.1433, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0608, |
|
"grad_norm": 5.6638665199279785, |
|
"learning_rate": 4.954533020713367e-06, |
|
"loss": 1.3123, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0616, |
|
"grad_norm": 2.033217668533325, |
|
"learning_rate": 4.953332410440434e-06, |
|
"loss": 4.12, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0624, |
|
"grad_norm": 5.832539081573486, |
|
"learning_rate": 4.952116303586631e-06, |
|
"loss": 1.4276, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0632, |
|
"grad_norm": 1.4119787216186523, |
|
"learning_rate": 4.95088470783355e-06, |
|
"loss": 3.9499, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 5.931257247924805, |
|
"learning_rate": 4.949637630960618e-06, |
|
"loss": 1.5232, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0648, |
|
"grad_norm": 1.5532656908035278, |
|
"learning_rate": 4.94837508084505e-06, |
|
"loss": 3.9162, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0656, |
|
"grad_norm": 5.160223007202148, |
|
"learning_rate": 4.947097065461801e-06, |
|
"loss": 1.7749, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0664, |
|
"grad_norm": 1.274683952331543, |
|
"learning_rate": 4.945803592883509e-06, |
|
"loss": 3.9429, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0672, |
|
"grad_norm": 4.50646448135376, |
|
"learning_rate": 4.94449467128045e-06, |
|
"loss": 1.3428, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.068, |
|
"grad_norm": 2.7638394832611084, |
|
"learning_rate": 4.943170308920484e-06, |
|
"loss": 4.0664, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0688, |
|
"grad_norm": 5.305659770965576, |
|
"learning_rate": 4.9418305141690045e-06, |
|
"loss": 1.6382, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0696, |
|
"grad_norm": 1.672782301902771, |
|
"learning_rate": 4.940475295488882e-06, |
|
"loss": 3.9736, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0704, |
|
"grad_norm": 4.357553482055664, |
|
"learning_rate": 4.939104661440415e-06, |
|
"loss": 1.2025, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0712, |
|
"grad_norm": 1.9459145069122314, |
|
"learning_rate": 4.937718620681273e-06, |
|
"loss": 3.8823, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 4.6320085525512695, |
|
"learning_rate": 4.9363171819664434e-06, |
|
"loss": 1.4891, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0728, |
|
"grad_norm": 1.9804147481918335, |
|
"learning_rate": 4.934900354148173e-06, |
|
"loss": 3.673, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0736, |
|
"grad_norm": 5.650574684143066, |
|
"learning_rate": 4.933468146175918e-06, |
|
"loss": 1.6462, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0744, |
|
"grad_norm": 2.002102851867676, |
|
"learning_rate": 4.9320205670962815e-06, |
|
"loss": 3.9996, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0752, |
|
"grad_norm": 5.602189540863037, |
|
"learning_rate": 4.930557626052961e-06, |
|
"loss": 1.57, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.076, |
|
"grad_norm": 1.618115782737732, |
|
"learning_rate": 4.929079332286685e-06, |
|
"loss": 3.9771, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0768, |
|
"grad_norm": 4.976815223693848, |
|
"learning_rate": 4.927585695135162e-06, |
|
"loss": 1.3109, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0776, |
|
"grad_norm": 1.5383416414260864, |
|
"learning_rate": 4.926076724033016e-06, |
|
"loss": 3.943, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0784, |
|
"grad_norm": 5.538623809814453, |
|
"learning_rate": 4.924552428511727e-06, |
|
"loss": 1.5928, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0792, |
|
"grad_norm": 1.1636689901351929, |
|
"learning_rate": 4.923012818199576e-06, |
|
"loss": 3.9089, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.035048484802246, |
|
"learning_rate": 4.921457902821578e-06, |
|
"loss": 1.709, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0808, |
|
"grad_norm": 1.3163026571273804, |
|
"learning_rate": 4.919887692199423e-06, |
|
"loss": 3.9234, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0816, |
|
"grad_norm": 4.93280029296875, |
|
"learning_rate": 4.9183021962514145e-06, |
|
"loss": 1.4215, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0824, |
|
"grad_norm": 2.1531784534454346, |
|
"learning_rate": 4.9167014249924075e-06, |
|
"loss": 3.8196, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0832, |
|
"grad_norm": 4.800553798675537, |
|
"learning_rate": 4.915085388533743e-06, |
|
"loss": 1.573, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.084, |
|
"grad_norm": 1.383305311203003, |
|
"learning_rate": 4.913454097083185e-06, |
|
"loss": 3.9708, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0848, |
|
"grad_norm": 4.389811038970947, |
|
"learning_rate": 4.911807560944858e-06, |
|
"loss": 1.3961, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0856, |
|
"grad_norm": 1.5299296379089355, |
|
"learning_rate": 4.910145790519177e-06, |
|
"loss": 3.8796, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0864, |
|
"grad_norm": 5.052987575531006, |
|
"learning_rate": 4.90846879630279e-06, |
|
"loss": 1.3103, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0872, |
|
"grad_norm": 1.417496919631958, |
|
"learning_rate": 4.906776588888502e-06, |
|
"loss": 3.9388, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.088, |
|
"grad_norm": 4.012498378753662, |
|
"learning_rate": 4.905069178965215e-06, |
|
"loss": 1.1366, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0888, |
|
"grad_norm": 1.2801809310913086, |
|
"learning_rate": 4.903346577317859e-06, |
|
"loss": 3.872, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0896, |
|
"grad_norm": 5.76353120803833, |
|
"learning_rate": 4.901608794827321e-06, |
|
"loss": 1.5188, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.0904, |
|
"grad_norm": 1.5510302782058716, |
|
"learning_rate": 4.89985584247038e-06, |
|
"loss": 3.807, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0912, |
|
"grad_norm": 4.934327125549316, |
|
"learning_rate": 4.898087731319637e-06, |
|
"loss": 1.6052, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.092, |
|
"grad_norm": 1.849161982536316, |
|
"learning_rate": 4.89630447254344e-06, |
|
"loss": 3.8367, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0928, |
|
"grad_norm": 5.75076150894165, |
|
"learning_rate": 4.894506077405824e-06, |
|
"loss": 1.6729, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0936, |
|
"grad_norm": 1.3285000324249268, |
|
"learning_rate": 4.892692557266429e-06, |
|
"loss": 3.9178, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0944, |
|
"grad_norm": 5.176731586456299, |
|
"learning_rate": 4.8908639235804324e-06, |
|
"loss": 1.3498, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0952, |
|
"grad_norm": 2.258445978164673, |
|
"learning_rate": 4.88902018789848e-06, |
|
"loss": 3.9289, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 4.080480575561523, |
|
"learning_rate": 4.887161361866608e-06, |
|
"loss": 1.2727, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0968, |
|
"grad_norm": 1.3605031967163086, |
|
"learning_rate": 4.8852874572261715e-06, |
|
"loss": 3.8425, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0976, |
|
"grad_norm": 4.4306135177612305, |
|
"learning_rate": 4.883398485813772e-06, |
|
"loss": 1.4429, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.0984, |
|
"grad_norm": 1.9310946464538574, |
|
"learning_rate": 4.881494459561177e-06, |
|
"loss": 3.7989, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0992, |
|
"grad_norm": 5.516058444976807, |
|
"learning_rate": 4.879575390495254e-06, |
|
"loss": 1.6466, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.665083646774292, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 3.7725, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1008, |
|
"grad_norm": 5.122972011566162, |
|
"learning_rate": 4.8756921725058935e-06, |
|
"loss": 1.4164, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1016, |
|
"grad_norm": 1.7785176038742065, |
|
"learning_rate": 4.873728048110973e-06, |
|
"loss": 3.8428, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.1024, |
|
"grad_norm": 4.19711446762085, |
|
"learning_rate": 4.871748929959598e-06, |
|
"loss": 1.4346, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1032, |
|
"grad_norm": 1.5167326927185059, |
|
"learning_rate": 4.869754830552956e-06, |
|
"loss": 3.7787, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.104, |
|
"grad_norm": 4.343649387359619, |
|
"learning_rate": 4.867745762486862e-06, |
|
"loss": 1.4161, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1048, |
|
"grad_norm": 1.7682503461837769, |
|
"learning_rate": 4.86572173845168e-06, |
|
"loss": 3.7656, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.1056, |
|
"grad_norm": 5.387735843658447, |
|
"learning_rate": 4.863682771232249e-06, |
|
"loss": 1.5529, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.1064, |
|
"grad_norm": 1.6323776245117188, |
|
"learning_rate": 4.861628873707792e-06, |
|
"loss": 3.7581, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.1072, |
|
"grad_norm": 4.973332405090332, |
|
"learning_rate": 4.859560058851844e-06, |
|
"loss": 1.3401, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.108, |
|
"grad_norm": 2.288790464401245, |
|
"learning_rate": 4.857476339732162e-06, |
|
"loss": 3.5462, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1088, |
|
"grad_norm": 4.954509735107422, |
|
"learning_rate": 4.855377729510648e-06, |
|
"loss": 1.4214, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1096, |
|
"grad_norm": 1.466504693031311, |
|
"learning_rate": 4.8532642414432675e-06, |
|
"loss": 3.7383, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1104, |
|
"grad_norm": 4.507660865783691, |
|
"learning_rate": 4.851135888879958e-06, |
|
"loss": 1.429, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1112, |
|
"grad_norm": 1.4335397481918335, |
|
"learning_rate": 4.8489926852645505e-06, |
|
"loss": 3.8185, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 5.188979148864746, |
|
"learning_rate": 4.846834644134686e-06, |
|
"loss": 1.288, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1128, |
|
"grad_norm": 1.4267185926437378, |
|
"learning_rate": 4.844661779121723e-06, |
|
"loss": 3.7755, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1136, |
|
"grad_norm": 4.5999555587768555, |
|
"learning_rate": 4.842474103950658e-06, |
|
"loss": 1.4337, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1144, |
|
"grad_norm": 1.5960358381271362, |
|
"learning_rate": 4.8402716324400375e-06, |
|
"loss": 3.8674, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1152, |
|
"grad_norm": 4.50584077835083, |
|
"learning_rate": 4.838054378501868e-06, |
|
"loss": 1.4054, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.116, |
|
"grad_norm": 2.3714451789855957, |
|
"learning_rate": 4.8358223561415304e-06, |
|
"loss": 3.6878, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1168, |
|
"grad_norm": 4.409125328063965, |
|
"learning_rate": 4.833575579457691e-06, |
|
"loss": 1.4443, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.1176, |
|
"grad_norm": 1.876566767692566, |
|
"learning_rate": 4.831314062642213e-06, |
|
"loss": 3.9204, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1184, |
|
"grad_norm": 4.678242206573486, |
|
"learning_rate": 4.829037819980065e-06, |
|
"loss": 1.3475, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1192, |
|
"grad_norm": 1.5604186058044434, |
|
"learning_rate": 4.8267468658492335e-06, |
|
"loss": 3.8065, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.738994598388672, |
|
"learning_rate": 4.824441214720629e-06, |
|
"loss": 1.2868, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1208, |
|
"grad_norm": 1.2587168216705322, |
|
"learning_rate": 4.822120881157998e-06, |
|
"loss": 3.8178, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.1216, |
|
"grad_norm": 4.9535298347473145, |
|
"learning_rate": 4.819785879817827e-06, |
|
"loss": 1.4865, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.1224, |
|
"grad_norm": 1.3460506200790405, |
|
"learning_rate": 4.8174362254492555e-06, |
|
"loss": 3.7509, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1232, |
|
"grad_norm": 6.2948832511901855, |
|
"learning_rate": 4.815071932893976e-06, |
|
"loss": 1.6562, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.124, |
|
"grad_norm": 1.2623156309127808, |
|
"learning_rate": 4.812693017086145e-06, |
|
"loss": 3.7352, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1248, |
|
"grad_norm": 4.746945858001709, |
|
"learning_rate": 4.810299493052289e-06, |
|
"loss": 1.4701, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1256, |
|
"grad_norm": 1.41659414768219, |
|
"learning_rate": 4.807891375911207e-06, |
|
"loss": 3.7158, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1264, |
|
"grad_norm": 5.151709079742432, |
|
"learning_rate": 4.805468680873874e-06, |
|
"loss": 1.5235, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1272, |
|
"grad_norm": 1.1390382051467896, |
|
"learning_rate": 4.803031423243349e-06, |
|
"loss": 3.7685, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 4.6451802253723145, |
|
"learning_rate": 4.800579618414677e-06, |
|
"loss": 1.3374, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1288, |
|
"grad_norm": 2.0730605125427246, |
|
"learning_rate": 4.798113281874788e-06, |
|
"loss": 3.7551, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.1296, |
|
"grad_norm": 4.244422435760498, |
|
"learning_rate": 4.7956324292024045e-06, |
|
"loss": 1.4507, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.1304, |
|
"grad_norm": 1.437325119972229, |
|
"learning_rate": 4.7931370760679415e-06, |
|
"loss": 3.8459, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1312, |
|
"grad_norm": 4.308803558349609, |
|
"learning_rate": 4.790627238233405e-06, |
|
"loss": 1.4397, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.132, |
|
"grad_norm": 1.3514691591262817, |
|
"learning_rate": 4.788102931552294e-06, |
|
"loss": 3.7826, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1328, |
|
"grad_norm": 4.431159973144531, |
|
"learning_rate": 4.785564171969503e-06, |
|
"loss": 1.3688, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1336, |
|
"grad_norm": 1.9444341659545898, |
|
"learning_rate": 4.783010975521216e-06, |
|
"loss": 3.786, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1344, |
|
"grad_norm": 4.421632289886475, |
|
"learning_rate": 4.78044335833481e-06, |
|
"loss": 1.3799, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1352, |
|
"grad_norm": 1.30320143699646, |
|
"learning_rate": 4.777861336628751e-06, |
|
"loss": 3.7414, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.136, |
|
"grad_norm": 4.836937427520752, |
|
"learning_rate": 4.775264926712489e-06, |
|
"loss": 1.3762, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1368, |
|
"grad_norm": 1.720489501953125, |
|
"learning_rate": 4.772654144986364e-06, |
|
"loss": 3.7693, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1376, |
|
"grad_norm": 4.573201656341553, |
|
"learning_rate": 4.77002900794149e-06, |
|
"loss": 1.4831, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1384, |
|
"grad_norm": 1.4767590761184692, |
|
"learning_rate": 4.767389532159659e-06, |
|
"loss": 3.7936, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1392, |
|
"grad_norm": 4.3813090324401855, |
|
"learning_rate": 4.764735734313236e-06, |
|
"loss": 1.3468, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.5614203214645386, |
|
"learning_rate": 4.762067631165049e-06, |
|
"loss": 3.8268, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1408, |
|
"grad_norm": 4.7881317138671875, |
|
"learning_rate": 4.75938523956829e-06, |
|
"loss": 1.6201, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1416, |
|
"grad_norm": 1.2957278490066528, |
|
"learning_rate": 4.756688576466398e-06, |
|
"loss": 3.7073, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1424, |
|
"grad_norm": 4.188969612121582, |
|
"learning_rate": 4.753977658892967e-06, |
|
"loss": 1.4572, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1432, |
|
"grad_norm": 2.046276330947876, |
|
"learning_rate": 4.751252503971624e-06, |
|
"loss": 3.6809, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 4.05677604675293, |
|
"learning_rate": 4.748513128915928e-06, |
|
"loss": 1.3311, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1448, |
|
"grad_norm": 1.2244303226470947, |
|
"learning_rate": 4.7457595510292615e-06, |
|
"loss": 3.8316, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1456, |
|
"grad_norm": 4.775726795196533, |
|
"learning_rate": 4.74299178770472e-06, |
|
"loss": 1.5603, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.1464, |
|
"grad_norm": 1.41436767578125, |
|
"learning_rate": 4.740209856424998e-06, |
|
"loss": 3.7105, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1472, |
|
"grad_norm": 5.448317527770996, |
|
"learning_rate": 4.737413774762287e-06, |
|
"loss": 1.2361, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.148, |
|
"grad_norm": 1.222730040550232, |
|
"learning_rate": 4.73460356037816e-06, |
|
"loss": 3.8072, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1488, |
|
"grad_norm": 4.413971900939941, |
|
"learning_rate": 4.731779231023456e-06, |
|
"loss": 1.6303, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1496, |
|
"grad_norm": 1.4510987997055054, |
|
"learning_rate": 4.728940804538176e-06, |
|
"loss": 3.6988, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.1504, |
|
"grad_norm": 4.780493259429932, |
|
"learning_rate": 4.726088298851362e-06, |
|
"loss": 1.1804, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1512, |
|
"grad_norm": 1.5533583164215088, |
|
"learning_rate": 4.723221731980993e-06, |
|
"loss": 3.6128, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.152, |
|
"grad_norm": 4.775524616241455, |
|
"learning_rate": 4.720341122033862e-06, |
|
"loss": 1.5147, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1528, |
|
"grad_norm": 1.6876249313354492, |
|
"learning_rate": 4.717446487205466e-06, |
|
"loss": 3.7315, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1536, |
|
"grad_norm": 3.9606497287750244, |
|
"learning_rate": 4.714537845779894e-06, |
|
"loss": 1.3284, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1544, |
|
"grad_norm": 1.2425357103347778, |
|
"learning_rate": 4.7116152161297045e-06, |
|
"loss": 3.7983, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1552, |
|
"grad_norm": 3.9687187671661377, |
|
"learning_rate": 4.708678616715815e-06, |
|
"loss": 1.3479, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.156, |
|
"grad_norm": 1.5664615631103516, |
|
"learning_rate": 4.705728066087384e-06, |
|
"loss": 3.7247, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1568, |
|
"grad_norm": 4.444562911987305, |
|
"learning_rate": 4.702763582881692e-06, |
|
"loss": 1.2835, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1576, |
|
"grad_norm": 1.8698633909225464, |
|
"learning_rate": 4.699785185824026e-06, |
|
"loss": 3.8091, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1584, |
|
"grad_norm": 4.637014389038086, |
|
"learning_rate": 4.696792893727562e-06, |
|
"loss": 1.3871, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1592, |
|
"grad_norm": 1.3571611642837524, |
|
"learning_rate": 4.693786725493242e-06, |
|
"loss": 3.7813, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.458593368530273, |
|
"learning_rate": 4.690766700109659e-06, |
|
"loss": 1.4933, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1608, |
|
"grad_norm": 1.5887341499328613, |
|
"learning_rate": 4.687732836652935e-06, |
|
"loss": 3.6873, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.1616, |
|
"grad_norm": 6.06688928604126, |
|
"learning_rate": 4.684685154286599e-06, |
|
"loss": 1.312, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1624, |
|
"grad_norm": 1.5234293937683105, |
|
"learning_rate": 4.6816236722614694e-06, |
|
"loss": 3.7146, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1632, |
|
"grad_norm": 4.001331806182861, |
|
"learning_rate": 4.6785484099155324e-06, |
|
"loss": 1.4507, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.164, |
|
"grad_norm": 1.5702141523361206, |
|
"learning_rate": 4.675459386673815e-06, |
|
"loss": 3.6801, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1648, |
|
"grad_norm": 3.6314635276794434, |
|
"learning_rate": 4.672356622048266e-06, |
|
"loss": 1.2263, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.1656, |
|
"grad_norm": 1.422735571861267, |
|
"learning_rate": 4.669240135637635e-06, |
|
"loss": 3.6963, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.1664, |
|
"grad_norm": 4.454765796661377, |
|
"learning_rate": 4.666109947127343e-06, |
|
"loss": 1.1784, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1672, |
|
"grad_norm": 2.0289947986602783, |
|
"learning_rate": 4.662966076289363e-06, |
|
"loss": 3.8096, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.168, |
|
"grad_norm": 4.10106086730957, |
|
"learning_rate": 4.659808542982089e-06, |
|
"loss": 1.3621, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1688, |
|
"grad_norm": 1.7755879163742065, |
|
"learning_rate": 4.65663736715022e-06, |
|
"loss": 3.6229, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.1696, |
|
"grad_norm": 3.9878623485565186, |
|
"learning_rate": 4.653452568824625e-06, |
|
"loss": 1.3814, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1704, |
|
"grad_norm": 1.2768726348876953, |
|
"learning_rate": 4.650254168122222e-06, |
|
"loss": 3.7008, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.1712, |
|
"grad_norm": 3.8291852474212646, |
|
"learning_rate": 4.647042185245848e-06, |
|
"loss": 1.3145, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.172, |
|
"grad_norm": 1.5507771968841553, |
|
"learning_rate": 4.6438166404841316e-06, |
|
"loss": 3.6915, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1728, |
|
"grad_norm": 4.554000377655029, |
|
"learning_rate": 4.640577554211366e-06, |
|
"loss": 1.2244, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1736, |
|
"grad_norm": 1.2744420766830444, |
|
"learning_rate": 4.637324946887384e-06, |
|
"loss": 3.7756, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1744, |
|
"grad_norm": 5.061426162719727, |
|
"learning_rate": 4.634058839057417e-06, |
|
"loss": 1.479, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1752, |
|
"grad_norm": 1.7611600160598755, |
|
"learning_rate": 4.63077925135198e-06, |
|
"loss": 3.7824, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.176, |
|
"grad_norm": 5.889009952545166, |
|
"learning_rate": 4.62748620448673e-06, |
|
"loss": 1.4081, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1768, |
|
"grad_norm": 1.560341238975525, |
|
"learning_rate": 4.624179719262342e-06, |
|
"loss": 3.7535, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.1776, |
|
"grad_norm": 4.9289231300354, |
|
"learning_rate": 4.620859816564371e-06, |
|
"loss": 1.4075, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1784, |
|
"grad_norm": 1.3027839660644531, |
|
"learning_rate": 4.6175265173631304e-06, |
|
"loss": 3.7511, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.1792, |
|
"grad_norm": 4.20517635345459, |
|
"learning_rate": 4.6141798427135475e-06, |
|
"loss": 1.2056, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9253166913986206, |
|
"learning_rate": 4.610819813755038e-06, |
|
"loss": 3.5762, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1808, |
|
"grad_norm": 4.654662609100342, |
|
"learning_rate": 4.607446451711372e-06, |
|
"loss": 1.4106, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.1816, |
|
"grad_norm": 1.6170463562011719, |
|
"learning_rate": 4.604059777890537e-06, |
|
"loss": 3.5927, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.1824, |
|
"grad_norm": 4.272345066070557, |
|
"learning_rate": 4.6006598136846056e-06, |
|
"loss": 1.3751, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.1832, |
|
"grad_norm": 1.1468439102172852, |
|
"learning_rate": 4.5972465805696e-06, |
|
"loss": 3.7235, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.184, |
|
"grad_norm": 4.337528705596924, |
|
"learning_rate": 4.593820100105355e-06, |
|
"loss": 1.212, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1848, |
|
"grad_norm": 1.6321645975112915, |
|
"learning_rate": 4.590380393935383e-06, |
|
"loss": 3.7544, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.1856, |
|
"grad_norm": 4.132114410400391, |
|
"learning_rate": 4.586927483786739e-06, |
|
"loss": 1.4566, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.1864, |
|
"grad_norm": 1.6077178716659546, |
|
"learning_rate": 4.583461391469879e-06, |
|
"loss": 3.6934, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.1872, |
|
"grad_norm": 4.226905345916748, |
|
"learning_rate": 4.579982138878527e-06, |
|
"loss": 1.5507, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.188, |
|
"grad_norm": 1.280689001083374, |
|
"learning_rate": 4.576489747989532e-06, |
|
"loss": 3.77, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1888, |
|
"grad_norm": 3.9274861812591553, |
|
"learning_rate": 4.572984240862733e-06, |
|
"loss": 1.5939, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.1896, |
|
"grad_norm": 1.420904278755188, |
|
"learning_rate": 4.56946563964082e-06, |
|
"loss": 3.5977, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1904, |
|
"grad_norm": 4.135627746582031, |
|
"learning_rate": 4.5659339665491894e-06, |
|
"loss": 1.2989, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.1912, |
|
"grad_norm": 1.301414966583252, |
|
"learning_rate": 4.562389243895807e-06, |
|
"loss": 3.6786, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 4.637629508972168, |
|
"learning_rate": 4.558831494071069e-06, |
|
"loss": 1.4187, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1928, |
|
"grad_norm": 1.2166482210159302, |
|
"learning_rate": 4.555260739547657e-06, |
|
"loss": 3.6755, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.1936, |
|
"grad_norm": 3.494554281234741, |
|
"learning_rate": 4.551677002880395e-06, |
|
"loss": 1.0023, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.1944, |
|
"grad_norm": 1.2456482648849487, |
|
"learning_rate": 4.548080306706114e-06, |
|
"loss": 3.7268, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.1952, |
|
"grad_norm": 3.789717674255371, |
|
"learning_rate": 4.544470673743502e-06, |
|
"loss": 1.1345, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.196, |
|
"grad_norm": 1.615335464477539, |
|
"learning_rate": 4.54084812679296e-06, |
|
"loss": 3.5679, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1968, |
|
"grad_norm": 4.087082862854004, |
|
"learning_rate": 4.537212688736466e-06, |
|
"loss": 1.5294, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.1976, |
|
"grad_norm": 1.3239346742630005, |
|
"learning_rate": 4.533564382537421e-06, |
|
"loss": 3.8232, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.1984, |
|
"grad_norm": 3.6679818630218506, |
|
"learning_rate": 4.529903231240511e-06, |
|
"loss": 1.1619, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1992, |
|
"grad_norm": 1.6263890266418457, |
|
"learning_rate": 4.526229257971556e-06, |
|
"loss": 3.7185, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.270927429199219, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 1.4918, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2008, |
|
"grad_norm": 1.6562573909759521, |
|
"learning_rate": 4.518842938425606e-06, |
|
"loss": 3.7609, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.2016, |
|
"grad_norm": 4.229763031005859, |
|
"learning_rate": 4.5151306388046175e-06, |
|
"loss": 1.1358, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2024, |
|
"grad_norm": 1.3031507730484009, |
|
"learning_rate": 4.511405610523309e-06, |
|
"loss": 3.6721, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2032, |
|
"grad_norm": 4.729180335998535, |
|
"learning_rate": 4.507667877110982e-06, |
|
"loss": 1.5732, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.204, |
|
"grad_norm": 1.4898425340652466, |
|
"learning_rate": 4.503917462177192e-06, |
|
"loss": 3.6121, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2048, |
|
"grad_norm": 4.497402667999268, |
|
"learning_rate": 4.500154389411598e-06, |
|
"loss": 1.3272, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.2056, |
|
"grad_norm": 1.141797423362732, |
|
"learning_rate": 4.496378682583813e-06, |
|
"loss": 3.6704, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.2064, |
|
"grad_norm": 4.572139739990234, |
|
"learning_rate": 4.492590365543253e-06, |
|
"loss": 1.4076, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.2072, |
|
"grad_norm": 1.6577672958374023, |
|
"learning_rate": 4.488789462218988e-06, |
|
"loss": 3.6953, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.208, |
|
"grad_norm": 4.384160041809082, |
|
"learning_rate": 4.4849759966195885e-06, |
|
"loss": 1.2979, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2088, |
|
"grad_norm": 1.2096525430679321, |
|
"learning_rate": 4.4811499928329775e-06, |
|
"loss": 3.7744, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.2096, |
|
"grad_norm": 4.4223246574401855, |
|
"learning_rate": 4.477311475026271e-06, |
|
"loss": 1.3639, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.2104, |
|
"grad_norm": 1.2359306812286377, |
|
"learning_rate": 4.473460467445637e-06, |
|
"loss": 3.6689, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2112, |
|
"grad_norm": 4.513794898986816, |
|
"learning_rate": 4.469596994416131e-06, |
|
"loss": 1.2571, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.212, |
|
"grad_norm": 1.4100075960159302, |
|
"learning_rate": 4.465721080341547e-06, |
|
"loss": 3.669, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2128, |
|
"grad_norm": 4.375431537628174, |
|
"learning_rate": 4.4618327497042676e-06, |
|
"loss": 1.3244, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.2136, |
|
"grad_norm": 1.1597020626068115, |
|
"learning_rate": 4.457932027065102e-06, |
|
"loss": 3.7463, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2144, |
|
"grad_norm": 4.304786682128906, |
|
"learning_rate": 4.4540189370631315e-06, |
|
"loss": 1.2498, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.2152, |
|
"grad_norm": 1.5611578226089478, |
|
"learning_rate": 4.450093504415562e-06, |
|
"loss": 3.7, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.216, |
|
"grad_norm": 4.710305213928223, |
|
"learning_rate": 4.446155753917559e-06, |
|
"loss": 1.4829, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2168, |
|
"grad_norm": 1.0595712661743164, |
|
"learning_rate": 4.442205710442095e-06, |
|
"loss": 3.7709, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.2176, |
|
"grad_norm": 4.113396644592285, |
|
"learning_rate": 4.43824339893979e-06, |
|
"loss": 1.4732, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.2184, |
|
"grad_norm": 1.346928358078003, |
|
"learning_rate": 4.434268844438758e-06, |
|
"loss": 3.6034, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.2192, |
|
"grad_norm": 4.2482452392578125, |
|
"learning_rate": 4.4302820720444454e-06, |
|
"loss": 1.3669, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.1629118919372559, |
|
"learning_rate": 4.426283106939474e-06, |
|
"loss": 3.7432, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2208, |
|
"grad_norm": 3.7786972522735596, |
|
"learning_rate": 4.422271974383479e-06, |
|
"loss": 1.3379, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.2216, |
|
"grad_norm": 1.7842165231704712, |
|
"learning_rate": 4.418248699712955e-06, |
|
"loss": 3.6675, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.2224, |
|
"grad_norm": 3.950294017791748, |
|
"learning_rate": 4.414213308341092e-06, |
|
"loss": 1.5301, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2232, |
|
"grad_norm": 1.4630101919174194, |
|
"learning_rate": 4.410165825757613e-06, |
|
"loss": 3.571, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 4.155986309051514, |
|
"learning_rate": 4.40610627752862e-06, |
|
"loss": 1.3453, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2248, |
|
"grad_norm": 1.698153018951416, |
|
"learning_rate": 4.402034689296425e-06, |
|
"loss": 3.6699, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.2256, |
|
"grad_norm": 4.893118858337402, |
|
"learning_rate": 4.397951086779392e-06, |
|
"loss": 1.6296, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2264, |
|
"grad_norm": 1.9244930744171143, |
|
"learning_rate": 4.393855495771774e-06, |
|
"loss": 3.728, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2272, |
|
"grad_norm": 4.7193827629089355, |
|
"learning_rate": 4.389747942143549e-06, |
|
"loss": 1.3797, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.228, |
|
"grad_norm": 1.3077738285064697, |
|
"learning_rate": 4.38562845184026e-06, |
|
"loss": 3.7899, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2288, |
|
"grad_norm": 4.431347370147705, |
|
"learning_rate": 4.381497050882845e-06, |
|
"loss": 1.6555, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.2296, |
|
"grad_norm": 1.5692718029022217, |
|
"learning_rate": 4.377353765367479e-06, |
|
"loss": 3.6771, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.2304, |
|
"grad_norm": 3.9838104248046875, |
|
"learning_rate": 4.373198621465405e-06, |
|
"loss": 1.1383, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2312, |
|
"grad_norm": 1.101969838142395, |
|
"learning_rate": 4.369031645422768e-06, |
|
"loss": 3.6786, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.232, |
|
"grad_norm": 4.563289165496826, |
|
"learning_rate": 4.364852863560456e-06, |
|
"loss": 1.2641, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2328, |
|
"grad_norm": 1.3112094402313232, |
|
"learning_rate": 4.360662302273926e-06, |
|
"loss": 3.7925, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2336, |
|
"grad_norm": 4.193509578704834, |
|
"learning_rate": 4.356459988033039e-06, |
|
"loss": 1.1937, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.2344, |
|
"grad_norm": 1.167222499847412, |
|
"learning_rate": 4.352245947381897e-06, |
|
"loss": 3.6606, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2352, |
|
"grad_norm": 5.211182117462158, |
|
"learning_rate": 4.348020206938672e-06, |
|
"loss": 1.5236, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.236, |
|
"grad_norm": 1.5906448364257812, |
|
"learning_rate": 4.343782793395435e-06, |
|
"loss": 3.6172, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2368, |
|
"grad_norm": 4.557344913482666, |
|
"learning_rate": 4.3395337335179945e-06, |
|
"loss": 1.2071, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.2376, |
|
"grad_norm": 1.5080584287643433, |
|
"learning_rate": 4.3352730541457215e-06, |
|
"loss": 3.5182, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2384, |
|
"grad_norm": 4.691150665283203, |
|
"learning_rate": 4.331000782191384e-06, |
|
"loss": 1.4428, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2392, |
|
"grad_norm": 1.2369650602340698, |
|
"learning_rate": 4.32671694464097e-06, |
|
"loss": 3.6389, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.130438327789307, |
|
"learning_rate": 4.322421568553529e-06, |
|
"loss": 1.4164, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2408, |
|
"grad_norm": 1.76595938205719, |
|
"learning_rate": 4.318114681060989e-06, |
|
"loss": 3.5655, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.2416, |
|
"grad_norm": 4.4846954345703125, |
|
"learning_rate": 4.3137963093679945e-06, |
|
"loss": 1.4369, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.2424, |
|
"grad_norm": 1.5124865770339966, |
|
"learning_rate": 4.309466480751726e-06, |
|
"loss": 3.5159, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.2432, |
|
"grad_norm": 4.232130527496338, |
|
"learning_rate": 4.305125222561736e-06, |
|
"loss": 1.5252, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.244, |
|
"grad_norm": 1.544097900390625, |
|
"learning_rate": 4.3007725622197675e-06, |
|
"loss": 3.7571, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2448, |
|
"grad_norm": 3.7335703372955322, |
|
"learning_rate": 4.296408527219592e-06, |
|
"loss": 1.2674, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2456, |
|
"grad_norm": 1.2222108840942383, |
|
"learning_rate": 4.2920331451268246e-06, |
|
"loss": 3.6799, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2464, |
|
"grad_norm": 4.682336807250977, |
|
"learning_rate": 4.2876464435787576e-06, |
|
"loss": 1.3907, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2472, |
|
"grad_norm": 1.7839024066925049, |
|
"learning_rate": 4.283248450284182e-06, |
|
"loss": 3.4632, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.248, |
|
"grad_norm": 4.441279411315918, |
|
"learning_rate": 4.278839193023214e-06, |
|
"loss": 1.4755, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2488, |
|
"grad_norm": 1.5365478992462158, |
|
"learning_rate": 4.274418699647117e-06, |
|
"loss": 3.5074, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2496, |
|
"grad_norm": 4.5583062171936035, |
|
"learning_rate": 4.269986998078132e-06, |
|
"loss": 1.681, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.2504, |
|
"grad_norm": 1.4559458494186401, |
|
"learning_rate": 4.265544116309294e-06, |
|
"loss": 3.5942, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2512, |
|
"grad_norm": 4.114186763763428, |
|
"learning_rate": 4.2610900824042575e-06, |
|
"loss": 1.6586, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.252, |
|
"grad_norm": 1.3927795886993408, |
|
"learning_rate": 4.256624924497124e-06, |
|
"loss": 3.6604, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2528, |
|
"grad_norm": 3.7071781158447266, |
|
"learning_rate": 4.2521486707922545e-06, |
|
"loss": 1.3165, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2536, |
|
"grad_norm": 1.5977774858474731, |
|
"learning_rate": 4.247661349564103e-06, |
|
"loss": 3.71, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2544, |
|
"grad_norm": 4.849422931671143, |
|
"learning_rate": 4.243162989157027e-06, |
|
"loss": 1.4173, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2552, |
|
"grad_norm": 1.525455355644226, |
|
"learning_rate": 4.2386536179851175e-06, |
|
"loss": 3.5833, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 4.420166969299316, |
|
"learning_rate": 4.234133264532012e-06, |
|
"loss": 1.2962, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2568, |
|
"grad_norm": 1.18903386592865, |
|
"learning_rate": 4.229601957350722e-06, |
|
"loss": 3.6984, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2576, |
|
"grad_norm": 3.8449833393096924, |
|
"learning_rate": 4.225059725063444e-06, |
|
"loss": 1.3112, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.2584, |
|
"grad_norm": 1.7980787754058838, |
|
"learning_rate": 4.220506596361387e-06, |
|
"loss": 3.5587, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.2592, |
|
"grad_norm": 3.5607681274414062, |
|
"learning_rate": 4.215942600004586e-06, |
|
"loss": 1.2554, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.572067379951477, |
|
"learning_rate": 4.211367764821722e-06, |
|
"loss": 3.7133, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2608, |
|
"grad_norm": 10.11608600616455, |
|
"learning_rate": 4.206782119709942e-06, |
|
"loss": 1.5166, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.2616, |
|
"grad_norm": 1.5986098051071167, |
|
"learning_rate": 4.202185693634671e-06, |
|
"loss": 3.6253, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.2624, |
|
"grad_norm": 3.9274239540100098, |
|
"learning_rate": 4.197578515629435e-06, |
|
"loss": 1.311, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.2632, |
|
"grad_norm": 1.2195369005203247, |
|
"learning_rate": 4.192960614795676e-06, |
|
"loss": 3.7322, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.264, |
|
"grad_norm": 4.052531719207764, |
|
"learning_rate": 4.188332020302561e-06, |
|
"loss": 1.3612, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2648, |
|
"grad_norm": 1.4489315748214722, |
|
"learning_rate": 4.183692761386813e-06, |
|
"loss": 3.534, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2656, |
|
"grad_norm": 5.4260053634643555, |
|
"learning_rate": 4.1790428673525104e-06, |
|
"loss": 1.523, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2664, |
|
"grad_norm": 1.6070371866226196, |
|
"learning_rate": 4.1743823675709115e-06, |
|
"loss": 3.4917, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.2672, |
|
"grad_norm": 4.363175392150879, |
|
"learning_rate": 4.1697112914802665e-06, |
|
"loss": 1.6258, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.268, |
|
"grad_norm": 1.6007026433944702, |
|
"learning_rate": 4.16502966858563e-06, |
|
"loss": 3.575, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2688, |
|
"grad_norm": 4.8055419921875, |
|
"learning_rate": 4.160337528458676e-06, |
|
"loss": 1.7682, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2696, |
|
"grad_norm": 1.2397737503051758, |
|
"learning_rate": 4.155634900737513e-06, |
|
"loss": 3.6629, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.2704, |
|
"grad_norm": 4.131043910980225, |
|
"learning_rate": 4.150921815126493e-06, |
|
"loss": 1.5988, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.2712, |
|
"grad_norm": 1.2639617919921875, |
|
"learning_rate": 4.146198301396025e-06, |
|
"loss": 3.5698, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.272, |
|
"grad_norm": 4.381173610687256, |
|
"learning_rate": 4.141464389382392e-06, |
|
"loss": 1.3198, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2728, |
|
"grad_norm": 1.440491795539856, |
|
"learning_rate": 4.136720108987552e-06, |
|
"loss": 3.6658, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2736, |
|
"grad_norm": 8.941045761108398, |
|
"learning_rate": 4.13196549017896e-06, |
|
"loss": 1.2674, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.2744, |
|
"grad_norm": 1.5544283390045166, |
|
"learning_rate": 4.127200562989372e-06, |
|
"loss": 3.5196, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.2752, |
|
"grad_norm": 4.094554424285889, |
|
"learning_rate": 4.122425357516658e-06, |
|
"loss": 1.2112, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.276, |
|
"grad_norm": 1.1563968658447266, |
|
"learning_rate": 4.117639903923611e-06, |
|
"loss": 3.6399, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2768, |
|
"grad_norm": 4.3765482902526855, |
|
"learning_rate": 4.112844232437757e-06, |
|
"loss": 1.3016, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.2776, |
|
"grad_norm": 1.073043704032898, |
|
"learning_rate": 4.108038373351163e-06, |
|
"loss": 3.6758, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.2784, |
|
"grad_norm": 4.243771553039551, |
|
"learning_rate": 4.103222357020248e-06, |
|
"loss": 1.4512, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2792, |
|
"grad_norm": 1.4195610284805298, |
|
"learning_rate": 4.098396213865587e-06, |
|
"loss": 3.6391, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.04062032699585, |
|
"learning_rate": 4.093559974371725e-06, |
|
"loss": 1.2876, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2808, |
|
"grad_norm": 1.384352207183838, |
|
"learning_rate": 4.0887136690869774e-06, |
|
"loss": 3.6527, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.2816, |
|
"grad_norm": 4.134579181671143, |
|
"learning_rate": 4.083857328623243e-06, |
|
"loss": 1.3498, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2824, |
|
"grad_norm": 1.8394545316696167, |
|
"learning_rate": 4.078990983655807e-06, |
|
"loss": 3.5694, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.2832, |
|
"grad_norm": 4.24132776260376, |
|
"learning_rate": 4.07411466492315e-06, |
|
"loss": 1.6123, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.284, |
|
"grad_norm": 1.1497430801391602, |
|
"learning_rate": 4.069228403226751e-06, |
|
"loss": 3.6655, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.2848, |
|
"grad_norm": 3.8187551498413086, |
|
"learning_rate": 4.064332229430895e-06, |
|
"loss": 1.4159, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2856, |
|
"grad_norm": 1.5703147649765015, |
|
"learning_rate": 4.059426174462476e-06, |
|
"loss": 3.5892, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2864, |
|
"grad_norm": 4.054878234863281, |
|
"learning_rate": 4.054510269310803e-06, |
|
"loss": 1.3898, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.2872, |
|
"grad_norm": 1.7447679042816162, |
|
"learning_rate": 4.049584545027406e-06, |
|
"loss": 3.5291, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 3.6220648288726807, |
|
"learning_rate": 4.044649032725836e-06, |
|
"loss": 1.1255, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2888, |
|
"grad_norm": 1.4866344928741455, |
|
"learning_rate": 4.039703763581472e-06, |
|
"loss": 3.647, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2896, |
|
"grad_norm": 4.575165271759033, |
|
"learning_rate": 4.034748768831319e-06, |
|
"loss": 1.3781, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.2904, |
|
"grad_norm": 1.0558618307113647, |
|
"learning_rate": 4.02978407977382e-06, |
|
"loss": 3.6163, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2912, |
|
"grad_norm": 4.454329490661621, |
|
"learning_rate": 4.024809727768648e-06, |
|
"loss": 1.3233, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.292, |
|
"grad_norm": 1.3956743478775024, |
|
"learning_rate": 4.019825744236514e-06, |
|
"loss": 3.5997, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2928, |
|
"grad_norm": 4.550688743591309, |
|
"learning_rate": 4.014832160658966e-06, |
|
"loss": 1.4364, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2936, |
|
"grad_norm": 1.2573503255844116, |
|
"learning_rate": 4.009829008578192e-06, |
|
"loss": 3.6729, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2944, |
|
"grad_norm": 4.038947582244873, |
|
"learning_rate": 4.004816319596822e-06, |
|
"loss": 1.2911, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.2952, |
|
"grad_norm": 1.9488675594329834, |
|
"learning_rate": 3.999794125377721e-06, |
|
"loss": 3.5393, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.296, |
|
"grad_norm": 4.447761535644531, |
|
"learning_rate": 3.9947624576437975e-06, |
|
"loss": 1.5997, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2968, |
|
"grad_norm": 1.2472996711730957, |
|
"learning_rate": 3.989721348177801e-06, |
|
"loss": 3.6067, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2976, |
|
"grad_norm": 4.081388473510742, |
|
"learning_rate": 3.984670828822118e-06, |
|
"loss": 1.4171, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.2984, |
|
"grad_norm": 1.7100144624710083, |
|
"learning_rate": 3.979610931478574e-06, |
|
"loss": 3.7103, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2992, |
|
"grad_norm": 4.408793926239014, |
|
"learning_rate": 3.97454168810823e-06, |
|
"loss": 1.3243, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.326974868774414, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 3.6149, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3008, |
|
"grad_norm": 4.624994277954102, |
|
"learning_rate": 3.964375291426361e-06, |
|
"loss": 1.5994, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.3016, |
|
"grad_norm": 1.3679853677749634, |
|
"learning_rate": 3.959278202331323e-06, |
|
"loss": 3.5478, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3024, |
|
"grad_norm": 4.432180881500244, |
|
"learning_rate": 3.954171895642052e-06, |
|
"loss": 1.4198, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.3032, |
|
"grad_norm": 1.0665056705474854, |
|
"learning_rate": 3.949056403612758e-06, |
|
"loss": 3.7173, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.304, |
|
"grad_norm": 3.6534807682037354, |
|
"learning_rate": 3.943931758555669e-06, |
|
"loss": 1.2773, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3048, |
|
"grad_norm": 1.4018532037734985, |
|
"learning_rate": 3.938797992840828e-06, |
|
"loss": 3.5796, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3056, |
|
"grad_norm": 4.3174357414245605, |
|
"learning_rate": 3.933655138895889e-06, |
|
"loss": 1.0747, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3064, |
|
"grad_norm": 1.893721342086792, |
|
"learning_rate": 3.928503229205913e-06, |
|
"loss": 3.5452, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.3072, |
|
"grad_norm": 4.509764194488525, |
|
"learning_rate": 3.923342296313162e-06, |
|
"loss": 1.4684, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.308, |
|
"grad_norm": 1.2628504037857056, |
|
"learning_rate": 3.918172372816892e-06, |
|
"loss": 3.5872, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3088, |
|
"grad_norm": 3.868783712387085, |
|
"learning_rate": 3.91299349137315e-06, |
|
"loss": 1.316, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.3096, |
|
"grad_norm": 1.3258881568908691, |
|
"learning_rate": 3.907805684694567e-06, |
|
"loss": 3.6877, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3104, |
|
"grad_norm": 3.9455106258392334, |
|
"learning_rate": 3.9026089855501475e-06, |
|
"loss": 1.2362, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.3112, |
|
"grad_norm": 1.0947574377059937, |
|
"learning_rate": 3.8974034267650695e-06, |
|
"loss": 3.735, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.312, |
|
"grad_norm": 4.135454177856445, |
|
"learning_rate": 3.89218904122047e-06, |
|
"loss": 1.3921, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3128, |
|
"grad_norm": 1.3168636560440063, |
|
"learning_rate": 3.886965861853243e-06, |
|
"loss": 3.5585, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.3136, |
|
"grad_norm": 3.532658100128174, |
|
"learning_rate": 3.881733921655829e-06, |
|
"loss": 1.2495, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.3144, |
|
"grad_norm": 1.3559529781341553, |
|
"learning_rate": 3.876493253676004e-06, |
|
"loss": 3.561, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.3152, |
|
"grad_norm": 4.4542036056518555, |
|
"learning_rate": 3.871243891016676e-06, |
|
"loss": 1.3177, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.316, |
|
"grad_norm": 1.6158586740493774, |
|
"learning_rate": 3.8659858668356735e-06, |
|
"loss": 3.623, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3168, |
|
"grad_norm": 4.352112293243408, |
|
"learning_rate": 3.8607192143455325e-06, |
|
"loss": 1.3388, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.3176, |
|
"grad_norm": 1.2379918098449707, |
|
"learning_rate": 3.855443966813295e-06, |
|
"loss": 3.6086, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.3184, |
|
"grad_norm": 4.482300758361816, |
|
"learning_rate": 3.85016015756029e-06, |
|
"loss": 1.3943, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3192, |
|
"grad_norm": 1.632942795753479, |
|
"learning_rate": 3.844867819961928e-06, |
|
"loss": 3.5682, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 4.9489521980285645, |
|
"learning_rate": 3.839566987447492e-06, |
|
"loss": 1.1445, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3208, |
|
"grad_norm": 1.3084850311279297, |
|
"learning_rate": 3.8342576934999184e-06, |
|
"loss": 3.7127, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3216, |
|
"grad_norm": 3.8171467781066895, |
|
"learning_rate": 3.828939971655595e-06, |
|
"loss": 1.302, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3224, |
|
"grad_norm": 1.2390443086624146, |
|
"learning_rate": 3.823613855504144e-06, |
|
"loss": 3.5798, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3232, |
|
"grad_norm": 4.057291507720947, |
|
"learning_rate": 3.8182793786882065e-06, |
|
"loss": 1.3189, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.324, |
|
"grad_norm": 1.3859179019927979, |
|
"learning_rate": 3.8129365749032398e-06, |
|
"loss": 3.6643, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3248, |
|
"grad_norm": 4.864846706390381, |
|
"learning_rate": 3.807585477897296e-06, |
|
"loss": 1.4575, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.3256, |
|
"grad_norm": 1.0886560678482056, |
|
"learning_rate": 3.802226121470811e-06, |
|
"loss": 3.7321, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3264, |
|
"grad_norm": 3.940027952194214, |
|
"learning_rate": 3.796858539476394e-06, |
|
"loss": 1.2742, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3272, |
|
"grad_norm": 1.2309926748275757, |
|
"learning_rate": 3.7914827658186104e-06, |
|
"loss": 3.5766, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.328, |
|
"grad_norm": 4.414444446563721, |
|
"learning_rate": 3.7860988344537664e-06, |
|
"loss": 1.2858, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3288, |
|
"grad_norm": 1.0498713254928589, |
|
"learning_rate": 3.7807067793897006e-06, |
|
"loss": 3.6743, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.3296, |
|
"grad_norm": 4.1902313232421875, |
|
"learning_rate": 3.775306634685562e-06, |
|
"loss": 1.4446, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3304, |
|
"grad_norm": 1.1650660037994385, |
|
"learning_rate": 3.7698984344516e-06, |
|
"loss": 3.6178, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.3312, |
|
"grad_norm": 4.5790910720825195, |
|
"learning_rate": 3.7644822128489476e-06, |
|
"loss": 1.5761, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.332, |
|
"grad_norm": 1.0688635110855103, |
|
"learning_rate": 3.7590580040894025e-06, |
|
"loss": 3.689, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3328, |
|
"grad_norm": 4.05617094039917, |
|
"learning_rate": 3.7536258424352164e-06, |
|
"loss": 1.6174, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.3336, |
|
"grad_norm": 1.236042857170105, |
|
"learning_rate": 3.7481857621988734e-06, |
|
"loss": 3.6902, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3344, |
|
"grad_norm": 4.205336093902588, |
|
"learning_rate": 3.742737797742878e-06, |
|
"loss": 1.3125, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.3352, |
|
"grad_norm": 1.460862159729004, |
|
"learning_rate": 3.737281983479534e-06, |
|
"loss": 3.503, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 4.190709114074707, |
|
"learning_rate": 3.731818353870729e-06, |
|
"loss": 1.2207, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3368, |
|
"grad_norm": 2.0372729301452637, |
|
"learning_rate": 3.726346943427719e-06, |
|
"loss": 3.5128, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3376, |
|
"grad_norm": 4.000549793243408, |
|
"learning_rate": 3.7208677867109042e-06, |
|
"loss": 1.244, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3384, |
|
"grad_norm": 1.509992003440857, |
|
"learning_rate": 3.7153809183296174e-06, |
|
"loss": 3.6028, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3392, |
|
"grad_norm": 3.7690091133117676, |
|
"learning_rate": 3.7098863729418997e-06, |
|
"loss": 1.1382, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.0848690271377563, |
|
"learning_rate": 3.7043841852542884e-06, |
|
"loss": 3.7097, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3408, |
|
"grad_norm": 4.2273359298706055, |
|
"learning_rate": 3.6988743900215895e-06, |
|
"loss": 1.3459, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.3416, |
|
"grad_norm": 1.30433189868927, |
|
"learning_rate": 3.6933570220466654e-06, |
|
"loss": 3.5762, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3424, |
|
"grad_norm": 3.894927740097046, |
|
"learning_rate": 3.6878321161802106e-06, |
|
"loss": 1.411, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3432, |
|
"grad_norm": 1.23166024684906, |
|
"learning_rate": 3.682299707320532e-06, |
|
"loss": 3.7625, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.344, |
|
"grad_norm": 4.281452655792236, |
|
"learning_rate": 3.6767598304133325e-06, |
|
"loss": 1.2892, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3448, |
|
"grad_norm": 1.510961890220642, |
|
"learning_rate": 3.6712125204514836e-06, |
|
"loss": 3.5778, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3456, |
|
"grad_norm": 3.6072661876678467, |
|
"learning_rate": 3.665657812474812e-06, |
|
"loss": 1.2145, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3464, |
|
"grad_norm": 1.6257572174072266, |
|
"learning_rate": 3.660095741569871e-06, |
|
"loss": 3.7148, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3472, |
|
"grad_norm": 4.151918411254883, |
|
"learning_rate": 3.654526342869724e-06, |
|
"loss": 1.3151, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.348, |
|
"grad_norm": 1.7173959016799927, |
|
"learning_rate": 3.6489496515537204e-06, |
|
"loss": 3.5563, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3488, |
|
"grad_norm": 3.5843987464904785, |
|
"learning_rate": 3.643365702847272e-06, |
|
"loss": 1.1541, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3496, |
|
"grad_norm": 1.2119823694229126, |
|
"learning_rate": 3.6377745320216346e-06, |
|
"loss": 3.6086, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3504, |
|
"grad_norm": 4.704022407531738, |
|
"learning_rate": 3.632176174393682e-06, |
|
"loss": 1.5989, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3512, |
|
"grad_norm": 1.3486601114273071, |
|
"learning_rate": 3.6265706653256837e-06, |
|
"loss": 3.6383, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 4.133458614349365, |
|
"learning_rate": 3.6209580402250816e-06, |
|
"loss": 1.2559, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3528, |
|
"grad_norm": 1.3388392925262451, |
|
"learning_rate": 3.615338334544265e-06, |
|
"loss": 3.6902, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3536, |
|
"grad_norm": 4.311944961547852, |
|
"learning_rate": 3.6097115837803504e-06, |
|
"loss": 1.1318, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.3544, |
|
"grad_norm": 1.4599226713180542, |
|
"learning_rate": 3.604077823474954e-06, |
|
"loss": 3.6407, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3552, |
|
"grad_norm": 4.284412384033203, |
|
"learning_rate": 3.5984370892139663e-06, |
|
"loss": 1.4261, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.356, |
|
"grad_norm": 1.4893653392791748, |
|
"learning_rate": 3.5927894166273324e-06, |
|
"loss": 3.6037, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3568, |
|
"grad_norm": 3.953293800354004, |
|
"learning_rate": 3.5871348413888207e-06, |
|
"loss": 1.2646, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.3576, |
|
"grad_norm": 1.2986643314361572, |
|
"learning_rate": 3.5814733992158025e-06, |
|
"loss": 3.5551, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3584, |
|
"grad_norm": 4.767986297607422, |
|
"learning_rate": 3.5758051258690223e-06, |
|
"loss": 1.6051, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3592, |
|
"grad_norm": 1.4707053899765015, |
|
"learning_rate": 3.5701300571523757e-06, |
|
"loss": 3.4898, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.075262546539307, |
|
"learning_rate": 3.564448228912682e-06, |
|
"loss": 1.0939, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3608, |
|
"grad_norm": 1.6893370151519775, |
|
"learning_rate": 3.558759677039455e-06, |
|
"loss": 3.524, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3616, |
|
"grad_norm": 4.155539035797119, |
|
"learning_rate": 3.553064437464682e-06, |
|
"loss": 1.3009, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3624, |
|
"grad_norm": 1.3253870010375977, |
|
"learning_rate": 3.5473625461625884e-06, |
|
"loss": 3.5764, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3632, |
|
"grad_norm": 4.075945854187012, |
|
"learning_rate": 3.54165403914942e-06, |
|
"loss": 1.2607, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.364, |
|
"grad_norm": 1.059866189956665, |
|
"learning_rate": 3.535938952483211e-06, |
|
"loss": 3.6742, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3648, |
|
"grad_norm": 4.110774993896484, |
|
"learning_rate": 3.5302173222635526e-06, |
|
"loss": 1.4106, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3656, |
|
"grad_norm": 1.3632076978683472, |
|
"learning_rate": 3.5244891846313733e-06, |
|
"loss": 3.6836, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.3664, |
|
"grad_norm": 3.705369472503662, |
|
"learning_rate": 3.518754575768702e-06, |
|
"loss": 1.3081, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.3672, |
|
"grad_norm": 1.1472023725509644, |
|
"learning_rate": 3.5130135318984454e-06, |
|
"loss": 3.6175, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.368, |
|
"grad_norm": 3.85665225982666, |
|
"learning_rate": 3.507266089284157e-06, |
|
"loss": 1.3936, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3688, |
|
"grad_norm": 1.0957272052764893, |
|
"learning_rate": 3.501512284229807e-06, |
|
"loss": 3.6699, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.3696, |
|
"grad_norm": 5.635092735290527, |
|
"learning_rate": 3.4957521530795576e-06, |
|
"loss": 1.5143, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3704, |
|
"grad_norm": 1.2065218687057495, |
|
"learning_rate": 3.4899857322175252e-06, |
|
"loss": 3.6554, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.3712, |
|
"grad_norm": 3.687448263168335, |
|
"learning_rate": 3.484213058067559e-06, |
|
"loss": 1.3567, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.372, |
|
"grad_norm": 1.4137887954711914, |
|
"learning_rate": 3.4784341670930067e-06, |
|
"loss": 3.5039, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3728, |
|
"grad_norm": 3.735736131668091, |
|
"learning_rate": 3.4726490957964836e-06, |
|
"loss": 1.1562, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3736, |
|
"grad_norm": 1.429471731185913, |
|
"learning_rate": 3.466857880719645e-06, |
|
"loss": 3.4816, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3744, |
|
"grad_norm": 3.8104074001312256, |
|
"learning_rate": 3.4610605584429526e-06, |
|
"loss": 1.2771, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.3752, |
|
"grad_norm": 1.0887689590454102, |
|
"learning_rate": 3.455257165585444e-06, |
|
"loss": 3.6168, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.376, |
|
"grad_norm": 4.246683120727539, |
|
"learning_rate": 3.4494477388045035e-06, |
|
"loss": 1.4563, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3768, |
|
"grad_norm": 1.181482195854187, |
|
"learning_rate": 3.443632314795627e-06, |
|
"loss": 3.5803, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3776, |
|
"grad_norm": 4.463985443115234, |
|
"learning_rate": 3.4378109302921946e-06, |
|
"loss": 1.3947, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.3784, |
|
"grad_norm": 2.0847549438476562, |
|
"learning_rate": 3.4319836220652334e-06, |
|
"loss": 3.5447, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3792, |
|
"grad_norm": 3.957758903503418, |
|
"learning_rate": 3.4261504269231904e-06, |
|
"loss": 1.3876, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.2002718448638916, |
|
"learning_rate": 3.4203113817116955e-06, |
|
"loss": 3.6171, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3808, |
|
"grad_norm": 3.7537636756896973, |
|
"learning_rate": 3.4144665233133318e-06, |
|
"loss": 1.3785, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.3816, |
|
"grad_norm": 1.081315517425537, |
|
"learning_rate": 3.408615888647402e-06, |
|
"loss": 3.6535, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.3824, |
|
"grad_norm": 4.511240005493164, |
|
"learning_rate": 3.402759514669694e-06, |
|
"loss": 1.5004, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.3832, |
|
"grad_norm": 1.60770845413208, |
|
"learning_rate": 3.3968974383722497e-06, |
|
"loss": 3.6355, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 4.516547679901123, |
|
"learning_rate": 3.391029696783127e-06, |
|
"loss": 1.2093, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3848, |
|
"grad_norm": 1.8860230445861816, |
|
"learning_rate": 3.385156326966173e-06, |
|
"loss": 3.5089, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3856, |
|
"grad_norm": 4.554468631744385, |
|
"learning_rate": 3.379277366020782e-06, |
|
"loss": 1.477, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3864, |
|
"grad_norm": 1.258987307548523, |
|
"learning_rate": 3.3733928510816677e-06, |
|
"loss": 3.583, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.3872, |
|
"grad_norm": 4.783546447753906, |
|
"learning_rate": 3.3675028193186243e-06, |
|
"loss": 1.5192, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.388, |
|
"grad_norm": 1.0193849802017212, |
|
"learning_rate": 3.3616073079362925e-06, |
|
"loss": 3.629, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3888, |
|
"grad_norm": 4.146661758422852, |
|
"learning_rate": 3.3557063541739283e-06, |
|
"loss": 1.2621, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.3896, |
|
"grad_norm": 1.25571608543396, |
|
"learning_rate": 3.349799995305162e-06, |
|
"loss": 3.5985, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3904, |
|
"grad_norm": 4.230064868927002, |
|
"learning_rate": 3.343888268637765e-06, |
|
"loss": 1.232, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3912, |
|
"grad_norm": 1.292047142982483, |
|
"learning_rate": 3.337971211513417e-06, |
|
"loss": 3.587, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.392, |
|
"grad_norm": 4.458502769470215, |
|
"learning_rate": 3.332048861307467e-06, |
|
"loss": 1.5272, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3928, |
|
"grad_norm": 1.4470558166503906, |
|
"learning_rate": 3.3261212554286977e-06, |
|
"loss": 3.617, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.3936, |
|
"grad_norm": 3.8012030124664307, |
|
"learning_rate": 3.320188431319088e-06, |
|
"loss": 1.2316, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3944, |
|
"grad_norm": 1.446913242340088, |
|
"learning_rate": 3.3142504264535808e-06, |
|
"loss": 3.6562, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.3952, |
|
"grad_norm": 4.147583961486816, |
|
"learning_rate": 3.308307278339842e-06, |
|
"loss": 1.3471, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.396, |
|
"grad_norm": 1.4276149272918701, |
|
"learning_rate": 3.3023590245180237e-06, |
|
"loss": 3.5495, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3968, |
|
"grad_norm": 3.8174455165863037, |
|
"learning_rate": 3.296405702560532e-06, |
|
"loss": 1.0808, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.3976, |
|
"grad_norm": 1.4224337339401245, |
|
"learning_rate": 3.2904473500717826e-06, |
|
"loss": 3.5136, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.3984, |
|
"grad_norm": 4.157987117767334, |
|
"learning_rate": 3.284484004687969e-06, |
|
"loss": 1.3679, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.3992, |
|
"grad_norm": 1.2928471565246582, |
|
"learning_rate": 3.278515704076821e-06, |
|
"loss": 3.6342, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 4.097792625427246, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 1.3664, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4008, |
|
"grad_norm": 1.1602492332458496, |
|
"learning_rate": 3.2665643879997054e-06, |
|
"loss": 3.6839, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.4016, |
|
"grad_norm": 3.862520456314087, |
|
"learning_rate": 3.2605814480247454e-06, |
|
"loss": 1.4261, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.4024, |
|
"grad_norm": 1.335418462753296, |
|
"learning_rate": 3.2545937038039904e-06, |
|
"loss": 3.599, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.4032, |
|
"grad_norm": 4.205375671386719, |
|
"learning_rate": 3.2486011931592863e-06, |
|
"loss": 1.5577, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.404, |
|
"grad_norm": 1.6254982948303223, |
|
"learning_rate": 3.2426039539425875e-06, |
|
"loss": 3.4938, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4048, |
|
"grad_norm": 4.060510158538818, |
|
"learning_rate": 3.2366020240357166e-06, |
|
"loss": 1.3317, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.4056, |
|
"grad_norm": 1.3750642538070679, |
|
"learning_rate": 3.2305954413501252e-06, |
|
"loss": 3.5692, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.4064, |
|
"grad_norm": 4.146080017089844, |
|
"learning_rate": 3.2245842438266526e-06, |
|
"loss": 1.1754, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.4072, |
|
"grad_norm": 1.4431229829788208, |
|
"learning_rate": 3.2185684694352913e-06, |
|
"loss": 3.4761, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.408, |
|
"grad_norm": 3.423323392868042, |
|
"learning_rate": 3.2125481561749406e-06, |
|
"loss": 1.2221, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4088, |
|
"grad_norm": 1.5508882999420166, |
|
"learning_rate": 3.2065233420731717e-06, |
|
"loss": 3.6483, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4096, |
|
"grad_norm": 3.5361711978912354, |
|
"learning_rate": 3.2004940651859844e-06, |
|
"loss": 1.1119, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.4104, |
|
"grad_norm": 1.326869010925293, |
|
"learning_rate": 3.194460363597569e-06, |
|
"loss": 3.5423, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4112, |
|
"grad_norm": 4.03769588470459, |
|
"learning_rate": 3.188422275420063e-06, |
|
"loss": 1.4117, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.412, |
|
"grad_norm": 1.3623450994491577, |
|
"learning_rate": 3.1823798387933134e-06, |
|
"loss": 3.498, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4128, |
|
"grad_norm": 4.137259483337402, |
|
"learning_rate": 3.1763330918846347e-06, |
|
"loss": 1.2982, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4136, |
|
"grad_norm": 1.067256212234497, |
|
"learning_rate": 3.1702820728885657e-06, |
|
"loss": 3.7067, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.4144, |
|
"grad_norm": 4.063728332519531, |
|
"learning_rate": 3.164226820026632e-06, |
|
"loss": 1.3187, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.4152, |
|
"grad_norm": 1.2824773788452148, |
|
"learning_rate": 3.1581673715471007e-06, |
|
"loss": 3.5527, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 3.7093420028686523, |
|
"learning_rate": 3.152103765724743e-06, |
|
"loss": 1.1281, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4168, |
|
"grad_norm": 1.288455843925476, |
|
"learning_rate": 3.1460360408605866e-06, |
|
"loss": 3.5115, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.4176, |
|
"grad_norm": 4.3098063468933105, |
|
"learning_rate": 3.1399642352816825e-06, |
|
"loss": 1.3113, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.4184, |
|
"grad_norm": 1.1683874130249023, |
|
"learning_rate": 3.1338883873408517e-06, |
|
"loss": 3.6437, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.4192, |
|
"grad_norm": 4.025966167449951, |
|
"learning_rate": 3.127808535416454e-06, |
|
"loss": 1.2751, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.7916266918182373, |
|
"learning_rate": 3.121724717912138e-06, |
|
"loss": 3.5067, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4208, |
|
"grad_norm": 4.328076362609863, |
|
"learning_rate": 3.1156369732566006e-06, |
|
"loss": 1.6473, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.4216, |
|
"grad_norm": 1.400840163230896, |
|
"learning_rate": 3.109545339903347e-06, |
|
"loss": 3.5727, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.4224, |
|
"grad_norm": 3.689484119415283, |
|
"learning_rate": 3.1034498563304435e-06, |
|
"loss": 1.3867, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4232, |
|
"grad_norm": 1.0594552755355835, |
|
"learning_rate": 3.0973505610402767e-06, |
|
"loss": 3.7167, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.424, |
|
"grad_norm": 4.328317642211914, |
|
"learning_rate": 3.0912474925593124e-06, |
|
"loss": 1.5036, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4248, |
|
"grad_norm": 1.1060447692871094, |
|
"learning_rate": 3.085140689437846e-06, |
|
"loss": 3.6933, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.4256, |
|
"grad_norm": 4.118087291717529, |
|
"learning_rate": 3.0790301902497664e-06, |
|
"loss": 1.3451, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.4264, |
|
"grad_norm": 1.254740595817566, |
|
"learning_rate": 3.072916033592307e-06, |
|
"loss": 3.5871, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.4272, |
|
"grad_norm": 4.144657611846924, |
|
"learning_rate": 3.0667982580858047e-06, |
|
"loss": 1.4215, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.428, |
|
"grad_norm": 1.1598517894744873, |
|
"learning_rate": 3.0606769023734535e-06, |
|
"loss": 3.6583, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4288, |
|
"grad_norm": 4.24267578125, |
|
"learning_rate": 3.0545520051210637e-06, |
|
"loss": 1.2563, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.4296, |
|
"grad_norm": 1.5326381921768188, |
|
"learning_rate": 3.048423605016815e-06, |
|
"loss": 3.5047, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.4304, |
|
"grad_norm": 4.730625629425049, |
|
"learning_rate": 3.042291740771014e-06, |
|
"loss": 1.3603, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.4312, |
|
"grad_norm": 1.132880687713623, |
|
"learning_rate": 3.036156451115846e-06, |
|
"loss": 3.6709, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 3.7942590713500977, |
|
"learning_rate": 3.0300177748051375e-06, |
|
"loss": 1.3794, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4328, |
|
"grad_norm": 1.4315778017044067, |
|
"learning_rate": 3.0238757506141013e-06, |
|
"loss": 3.5769, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.4336, |
|
"grad_norm": 3.5602166652679443, |
|
"learning_rate": 3.0177304173391038e-06, |
|
"loss": 1.2704, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.4344, |
|
"grad_norm": 1.1675716638565063, |
|
"learning_rate": 3.0115818137974066e-06, |
|
"loss": 3.5886, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4352, |
|
"grad_norm": 4.345582962036133, |
|
"learning_rate": 3.0054299788269343e-06, |
|
"loss": 1.4216, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.436, |
|
"grad_norm": 1.762725591659546, |
|
"learning_rate": 2.9992749512860177e-06, |
|
"loss": 3.4446, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.4368, |
|
"grad_norm": 3.993100643157959, |
|
"learning_rate": 2.9931167700531575e-06, |
|
"loss": 1.343, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.4376, |
|
"grad_norm": 1.2319386005401611, |
|
"learning_rate": 2.9869554740267726e-06, |
|
"loss": 3.603, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.4384, |
|
"grad_norm": 4.317058086395264, |
|
"learning_rate": 2.9807911021249573e-06, |
|
"loss": 1.4564, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.4392, |
|
"grad_norm": 1.6317486763000488, |
|
"learning_rate": 2.9746236932852355e-06, |
|
"loss": 3.5411, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.8238189220428467, |
|
"learning_rate": 2.9684532864643123e-06, |
|
"loss": 1.1421, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4408, |
|
"grad_norm": 1.9044779539108276, |
|
"learning_rate": 2.9622799206378306e-06, |
|
"loss": 3.6848, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.4416, |
|
"grad_norm": 3.827505588531494, |
|
"learning_rate": 2.956103634800126e-06, |
|
"loss": 1.3386, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4424, |
|
"grad_norm": 1.3661056756973267, |
|
"learning_rate": 2.949924467963975e-06, |
|
"loss": 3.4422, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.4432, |
|
"grad_norm": 4.082735538482666, |
|
"learning_rate": 2.943742459160354e-06, |
|
"loss": 1.3541, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.444, |
|
"grad_norm": 1.28450345993042, |
|
"learning_rate": 2.9375576474381907e-06, |
|
"loss": 3.5994, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.4448, |
|
"grad_norm": 3.4685943126678467, |
|
"learning_rate": 2.9313700718641167e-06, |
|
"loss": 1.4483, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.4456, |
|
"grad_norm": 1.7730368375778198, |
|
"learning_rate": 2.925179771522223e-06, |
|
"loss": 3.6276, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.4464, |
|
"grad_norm": 3.9150004386901855, |
|
"learning_rate": 2.9189867855138103e-06, |
|
"loss": 1.3486, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.4472, |
|
"grad_norm": 1.5707478523254395, |
|
"learning_rate": 2.912791152957145e-06, |
|
"loss": 3.5531, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 4.4283766746521, |
|
"learning_rate": 2.9065929129872097e-06, |
|
"loss": 1.4254, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4488, |
|
"grad_norm": 1.5481115579605103, |
|
"learning_rate": 2.900392104755455e-06, |
|
"loss": 3.4633, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4496, |
|
"grad_norm": 3.5355985164642334, |
|
"learning_rate": 2.8941887674295573e-06, |
|
"loss": 1.3703, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.4504, |
|
"grad_norm": 1.2419151067733765, |
|
"learning_rate": 2.887982940193165e-06, |
|
"loss": 3.6656, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.4512, |
|
"grad_norm": 4.397960186004639, |
|
"learning_rate": 2.8817746622456585e-06, |
|
"loss": 1.338, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.452, |
|
"grad_norm": 1.4676947593688965, |
|
"learning_rate": 2.875563972801893e-06, |
|
"loss": 3.6548, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4528, |
|
"grad_norm": 4.111155033111572, |
|
"learning_rate": 2.8693509110919597e-06, |
|
"loss": 1.3694, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.4536, |
|
"grad_norm": 1.7541122436523438, |
|
"learning_rate": 2.863135516360932e-06, |
|
"loss": 3.4508, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4544, |
|
"grad_norm": 4.085772514343262, |
|
"learning_rate": 2.8569178278686222e-06, |
|
"loss": 1.3314, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.4552, |
|
"grad_norm": 1.2001174688339233, |
|
"learning_rate": 2.85069788488933e-06, |
|
"loss": 3.5885, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.456, |
|
"grad_norm": 4.38803768157959, |
|
"learning_rate": 2.844475726711595e-06, |
|
"loss": 1.1816, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4568, |
|
"grad_norm": 1.2394533157348633, |
|
"learning_rate": 2.8382513926379508e-06, |
|
"loss": 3.6019, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4576, |
|
"grad_norm": 4.420421600341797, |
|
"learning_rate": 2.832024921984674e-06, |
|
"loss": 1.4351, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.4584, |
|
"grad_norm": 1.2522428035736084, |
|
"learning_rate": 2.825796354081537e-06, |
|
"loss": 3.6141, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4592, |
|
"grad_norm": 4.002085208892822, |
|
"learning_rate": 2.8195657282715595e-06, |
|
"loss": 1.1009, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.433961272239685, |
|
"learning_rate": 2.813333083910761e-06, |
|
"loss": 3.6517, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4608, |
|
"grad_norm": 4.165874004364014, |
|
"learning_rate": 2.807098460367911e-06, |
|
"loss": 1.3473, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4616, |
|
"grad_norm": 1.468865990638733, |
|
"learning_rate": 2.800861897024279e-06, |
|
"loss": 3.6747, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.4624, |
|
"grad_norm": 4.306812286376953, |
|
"learning_rate": 2.79462343327339e-06, |
|
"loss": 1.416, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.4632, |
|
"grad_norm": 1.0383753776550293, |
|
"learning_rate": 2.7883831085207707e-06, |
|
"loss": 3.575, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.464, |
|
"grad_norm": 4.186305999755859, |
|
"learning_rate": 2.7821409621837042e-06, |
|
"loss": 1.5874, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4648, |
|
"grad_norm": 1.3052856922149658, |
|
"learning_rate": 2.7758970336909795e-06, |
|
"loss": 3.6154, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.4656, |
|
"grad_norm": 3.598694324493408, |
|
"learning_rate": 2.7696513624826422e-06, |
|
"loss": 1.2231, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.4664, |
|
"grad_norm": 1.3978124856948853, |
|
"learning_rate": 2.763403988009746e-06, |
|
"loss": 3.5403, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4672, |
|
"grad_norm": 3.618967056274414, |
|
"learning_rate": 2.7571549497341044e-06, |
|
"loss": 1.29, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.468, |
|
"grad_norm": 1.4016177654266357, |
|
"learning_rate": 2.7509042871280373e-06, |
|
"loss": 3.6256, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4688, |
|
"grad_norm": 3.9204423427581787, |
|
"learning_rate": 2.7446520396741293e-06, |
|
"loss": 1.4597, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4696, |
|
"grad_norm": 1.4617024660110474, |
|
"learning_rate": 2.7383982468649715e-06, |
|
"loss": 3.482, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.4704, |
|
"grad_norm": 4.012588024139404, |
|
"learning_rate": 2.73214294820292e-06, |
|
"loss": 1.2928, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.4712, |
|
"grad_norm": 1.4617540836334229, |
|
"learning_rate": 2.725886183199839e-06, |
|
"loss": 3.626, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.472, |
|
"grad_norm": 3.5914876461029053, |
|
"learning_rate": 2.7196279913768587e-06, |
|
"loss": 1.3148, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4728, |
|
"grad_norm": 1.4136903285980225, |
|
"learning_rate": 2.713368412264118e-06, |
|
"loss": 3.5289, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.4736, |
|
"grad_norm": 3.7139124870300293, |
|
"learning_rate": 2.7071074854005206e-06, |
|
"loss": 1.3292, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.4744, |
|
"grad_norm": 1.2121789455413818, |
|
"learning_rate": 2.700845250333486e-06, |
|
"loss": 3.6458, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.4752, |
|
"grad_norm": 4.53924036026001, |
|
"learning_rate": 2.694581746618691e-06, |
|
"loss": 1.3469, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.476, |
|
"grad_norm": 1.2464954853057861, |
|
"learning_rate": 2.688317013819832e-06, |
|
"loss": 3.5712, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4768, |
|
"grad_norm": 3.8551762104034424, |
|
"learning_rate": 2.682051091508365e-06, |
|
"loss": 1.3476, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.4776, |
|
"grad_norm": 1.2209997177124023, |
|
"learning_rate": 2.67578401926326e-06, |
|
"loss": 3.6444, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.4784, |
|
"grad_norm": 4.334421634674072, |
|
"learning_rate": 2.6695158366707526e-06, |
|
"loss": 1.4771, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.4792, |
|
"grad_norm": 1.5928137302398682, |
|
"learning_rate": 2.6632465833240895e-06, |
|
"loss": 3.4254, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.963142156600952, |
|
"learning_rate": 2.6569762988232838e-06, |
|
"loss": 1.3901, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4808, |
|
"grad_norm": 1.2593353986740112, |
|
"learning_rate": 2.6507050227748595e-06, |
|
"loss": 3.5619, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.4816, |
|
"grad_norm": 4.629072189331055, |
|
"learning_rate": 2.6444327947916037e-06, |
|
"loss": 1.5413, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4824, |
|
"grad_norm": 1.2204415798187256, |
|
"learning_rate": 2.6381596544923184e-06, |
|
"loss": 3.6041, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4832, |
|
"grad_norm": 4.39404821395874, |
|
"learning_rate": 2.6318856415015664e-06, |
|
"loss": 1.1507, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.484, |
|
"grad_norm": 1.2167773246765137, |
|
"learning_rate": 2.625610795449424e-06, |
|
"loss": 3.5377, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4848, |
|
"grad_norm": 4.067314624786377, |
|
"learning_rate": 2.6193351559712294e-06, |
|
"loss": 1.3543, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.4856, |
|
"grad_norm": 1.054069995880127, |
|
"learning_rate": 2.6130587627073315e-06, |
|
"loss": 3.678, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4864, |
|
"grad_norm": 4.561433792114258, |
|
"learning_rate": 2.606781655302843e-06, |
|
"loss": 1.5264, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.4872, |
|
"grad_norm": 1.6582963466644287, |
|
"learning_rate": 2.6005038734073833e-06, |
|
"loss": 3.4737, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.488, |
|
"grad_norm": 4.4807233810424805, |
|
"learning_rate": 2.594225456674837e-06, |
|
"loss": 1.5468, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4888, |
|
"grad_norm": 1.4780353307724, |
|
"learning_rate": 2.5879464447630947e-06, |
|
"loss": 3.6692, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.4896, |
|
"grad_norm": 4.209949493408203, |
|
"learning_rate": 2.58166687733381e-06, |
|
"loss": 1.2275, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4904, |
|
"grad_norm": 1.4267958402633667, |
|
"learning_rate": 2.575386794052142e-06, |
|
"loss": 3.4531, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.4912, |
|
"grad_norm": 3.8919217586517334, |
|
"learning_rate": 2.569106234586511e-06, |
|
"loss": 1.3178, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.492, |
|
"grad_norm": 1.4168897867202759, |
|
"learning_rate": 2.5628252386083443e-06, |
|
"loss": 3.4955, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4928, |
|
"grad_norm": 3.9594831466674805, |
|
"learning_rate": 2.5565438457918247e-06, |
|
"loss": 1.3968, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.4936, |
|
"grad_norm": 1.1420923471450806, |
|
"learning_rate": 2.5502620958136444e-06, |
|
"loss": 3.6264, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4944, |
|
"grad_norm": 4.060093402862549, |
|
"learning_rate": 2.5439800283527495e-06, |
|
"loss": 1.3898, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.4952, |
|
"grad_norm": 1.4885039329528809, |
|
"learning_rate": 2.537697683090093e-06, |
|
"loss": 3.492, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.496, |
|
"grad_norm": 4.163914203643799, |
|
"learning_rate": 2.531415099708382e-06, |
|
"loss": 1.1859, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4968, |
|
"grad_norm": 1.1269545555114746, |
|
"learning_rate": 2.525132317891827e-06, |
|
"loss": 3.5954, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4976, |
|
"grad_norm": 4.090238571166992, |
|
"learning_rate": 2.518849377325893e-06, |
|
"loss": 1.3966, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.4984, |
|
"grad_norm": 1.5226904153823853, |
|
"learning_rate": 2.5125663176970475e-06, |
|
"loss": 3.6323, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.4992, |
|
"grad_norm": 3.7972140312194824, |
|
"learning_rate": 2.5062831786925102e-06, |
|
"loss": 1.39, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.4045028686523438, |
|
"learning_rate": 2.5e-06, |
|
"loss": 3.5625, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5008, |
|
"grad_norm": 3.8131749629974365, |
|
"learning_rate": 2.4937168213074906e-06, |
|
"loss": 1.2028, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5016, |
|
"grad_norm": 2.0082039833068848, |
|
"learning_rate": 2.487433682302953e-06, |
|
"loss": 3.3618, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5024, |
|
"grad_norm": 4.199687957763672, |
|
"learning_rate": 2.4811506226741077e-06, |
|
"loss": 1.2716, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5032, |
|
"grad_norm": 1.1121747493743896, |
|
"learning_rate": 2.474867682108174e-06, |
|
"loss": 3.5795, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.504, |
|
"grad_norm": 3.8342783451080322, |
|
"learning_rate": 2.4685849002916184e-06, |
|
"loss": 1.2034, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5048, |
|
"grad_norm": 1.6797664165496826, |
|
"learning_rate": 2.4623023169099074e-06, |
|
"loss": 3.5073, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5056, |
|
"grad_norm": 5.292508125305176, |
|
"learning_rate": 2.456019971647251e-06, |
|
"loss": 1.3187, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5064, |
|
"grad_norm": 1.0428590774536133, |
|
"learning_rate": 2.449737904186357e-06, |
|
"loss": 3.6168, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5072, |
|
"grad_norm": 3.81816029548645, |
|
"learning_rate": 2.4434561542081765e-06, |
|
"loss": 1.3212, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.508, |
|
"grad_norm": 1.0982403755187988, |
|
"learning_rate": 2.4371747613916566e-06, |
|
"loss": 3.6012, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5088, |
|
"grad_norm": 4.740167617797852, |
|
"learning_rate": 2.4308937654134893e-06, |
|
"loss": 1.3399, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5096, |
|
"grad_norm": 1.26600980758667, |
|
"learning_rate": 2.4246132059478582e-06, |
|
"loss": 3.5275, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.5104, |
|
"grad_norm": 4.418180465698242, |
|
"learning_rate": 2.4183331226661913e-06, |
|
"loss": 1.4019, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5112, |
|
"grad_norm": 2.0348660945892334, |
|
"learning_rate": 2.4120535552369057e-06, |
|
"loss": 3.5616, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 3.7417869567871094, |
|
"learning_rate": 2.4057745433251637e-06, |
|
"loss": 1.3269, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5128, |
|
"grad_norm": 1.818655252456665, |
|
"learning_rate": 2.3994961265926166e-06, |
|
"loss": 3.5734, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5136, |
|
"grad_norm": 3.8714828491210938, |
|
"learning_rate": 2.3932183446971584e-06, |
|
"loss": 1.3336, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5144, |
|
"grad_norm": 1.1985024213790894, |
|
"learning_rate": 2.386941237292669e-06, |
|
"loss": 3.5905, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5152, |
|
"grad_norm": 3.901711940765381, |
|
"learning_rate": 2.3806648440287715e-06, |
|
"loss": 1.1541, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.516, |
|
"grad_norm": 1.3076053857803345, |
|
"learning_rate": 2.3743892045505764e-06, |
|
"loss": 3.6319, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5168, |
|
"grad_norm": 3.9768855571746826, |
|
"learning_rate": 2.368114358498434e-06, |
|
"loss": 1.5297, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5176, |
|
"grad_norm": 1.135161280632019, |
|
"learning_rate": 2.361840345507683e-06, |
|
"loss": 3.6021, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5184, |
|
"grad_norm": 3.6397156715393066, |
|
"learning_rate": 2.355567205208397e-06, |
|
"loss": 1.3282, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5192, |
|
"grad_norm": 1.3913445472717285, |
|
"learning_rate": 2.3492949772251418e-06, |
|
"loss": 3.4597, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 3.9108190536499023, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"loss": 1.0836, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5208, |
|
"grad_norm": 1.6176162958145142, |
|
"learning_rate": 2.3367534166759105e-06, |
|
"loss": 3.5934, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5216, |
|
"grad_norm": 3.639057159423828, |
|
"learning_rate": 2.3304841633292487e-06, |
|
"loss": 1.2418, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5224, |
|
"grad_norm": 1.5021276473999023, |
|
"learning_rate": 2.324215980736741e-06, |
|
"loss": 3.4284, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5232, |
|
"grad_norm": 5.434640407562256, |
|
"learning_rate": 2.317948908491636e-06, |
|
"loss": 1.3802, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.524, |
|
"grad_norm": 1.7329832315444946, |
|
"learning_rate": 2.3116829861801687e-06, |
|
"loss": 3.4577, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5248, |
|
"grad_norm": 3.633262872695923, |
|
"learning_rate": 2.305418253381309e-06, |
|
"loss": 1.1311, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.5256, |
|
"grad_norm": 1.2898222208023071, |
|
"learning_rate": 2.299154749666515e-06, |
|
"loss": 3.5833, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5264, |
|
"grad_norm": 3.3343076705932617, |
|
"learning_rate": 2.2928925145994798e-06, |
|
"loss": 1.2565, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5272, |
|
"grad_norm": 1.1492732763290405, |
|
"learning_rate": 2.286631587735883e-06, |
|
"loss": 3.6572, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 4.284005165100098, |
|
"learning_rate": 2.280372008623142e-06, |
|
"loss": 1.4464, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5288, |
|
"grad_norm": 1.7030223608016968, |
|
"learning_rate": 2.274113816800161e-06, |
|
"loss": 3.4687, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5296, |
|
"grad_norm": 4.307010650634766, |
|
"learning_rate": 2.267857051797081e-06, |
|
"loss": 1.3294, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5304, |
|
"grad_norm": 1.5467772483825684, |
|
"learning_rate": 2.261601753135029e-06, |
|
"loss": 3.5568, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5312, |
|
"grad_norm": 3.650076150894165, |
|
"learning_rate": 2.255347960325871e-06, |
|
"loss": 1.3358, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.532, |
|
"grad_norm": 1.5734375715255737, |
|
"learning_rate": 2.2490957128719627e-06, |
|
"loss": 3.4565, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5328, |
|
"grad_norm": 3.6878743171691895, |
|
"learning_rate": 2.2428450502658964e-06, |
|
"loss": 1.1379, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5336, |
|
"grad_norm": 1.115048885345459, |
|
"learning_rate": 2.2365960119902543e-06, |
|
"loss": 3.6159, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5344, |
|
"grad_norm": 4.451643943786621, |
|
"learning_rate": 2.2303486375173586e-06, |
|
"loss": 1.3798, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5352, |
|
"grad_norm": 1.2209587097167969, |
|
"learning_rate": 2.224102966309021e-06, |
|
"loss": 3.5913, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.536, |
|
"grad_norm": 3.687743663787842, |
|
"learning_rate": 2.2178590378162957e-06, |
|
"loss": 1.2116, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5368, |
|
"grad_norm": 1.4728742837905884, |
|
"learning_rate": 2.2116168914792293e-06, |
|
"loss": 3.5415, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5376, |
|
"grad_norm": 3.96630859375, |
|
"learning_rate": 2.205376566726611e-06, |
|
"loss": 1.3889, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5384, |
|
"grad_norm": 1.215154767036438, |
|
"learning_rate": 2.1991381029757216e-06, |
|
"loss": 3.5867, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5392, |
|
"grad_norm": 3.8956687450408936, |
|
"learning_rate": 2.19290153963209e-06, |
|
"loss": 1.5616, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.183532476425171, |
|
"learning_rate": 2.186666916089239e-06, |
|
"loss": 3.5136, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5408, |
|
"grad_norm": 3.5824153423309326, |
|
"learning_rate": 2.1804342717284414e-06, |
|
"loss": 1.2544, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5416, |
|
"grad_norm": 1.325810432434082, |
|
"learning_rate": 2.174203645918464e-06, |
|
"loss": 3.5406, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5424, |
|
"grad_norm": 3.4541144371032715, |
|
"learning_rate": 2.1679750780153265e-06, |
|
"loss": 1.3576, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5432, |
|
"grad_norm": 1.5813454389572144, |
|
"learning_rate": 2.1617486073620496e-06, |
|
"loss": 3.4813, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 3.9602949619293213, |
|
"learning_rate": 2.155524273288405e-06, |
|
"loss": 1.426, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5448, |
|
"grad_norm": 1.4534196853637695, |
|
"learning_rate": 2.1493021151106704e-06, |
|
"loss": 3.5585, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5456, |
|
"grad_norm": 3.9135422706604004, |
|
"learning_rate": 2.143082172131378e-06, |
|
"loss": 1.3641, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5464, |
|
"grad_norm": 1.6020511388778687, |
|
"learning_rate": 2.1368644836390684e-06, |
|
"loss": 3.5024, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5472, |
|
"grad_norm": 4.677028179168701, |
|
"learning_rate": 2.130649088908041e-06, |
|
"loss": 1.366, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.548, |
|
"grad_norm": 1.4928466081619263, |
|
"learning_rate": 2.1244360271981073e-06, |
|
"loss": 3.5495, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5488, |
|
"grad_norm": 4.278928279876709, |
|
"learning_rate": 2.1182253377543428e-06, |
|
"loss": 1.3534, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.5496, |
|
"grad_norm": 1.3462296724319458, |
|
"learning_rate": 2.1120170598068353e-06, |
|
"loss": 3.6396, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5504, |
|
"grad_norm": 5.2212653160095215, |
|
"learning_rate": 2.1058112325704436e-06, |
|
"loss": 1.3357, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5512, |
|
"grad_norm": 1.1819498538970947, |
|
"learning_rate": 2.0996078952445453e-06, |
|
"loss": 3.6596, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.552, |
|
"grad_norm": 3.7068729400634766, |
|
"learning_rate": 2.093407087012791e-06, |
|
"loss": 1.3518, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5528, |
|
"grad_norm": 1.0458273887634277, |
|
"learning_rate": 2.0872088470428553e-06, |
|
"loss": 3.607, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.5536, |
|
"grad_norm": 4.25509786605835, |
|
"learning_rate": 2.08101321448619e-06, |
|
"loss": 1.4629, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5544, |
|
"grad_norm": 1.1481705904006958, |
|
"learning_rate": 2.0748202284777775e-06, |
|
"loss": 3.6161, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5552, |
|
"grad_norm": 3.934365749359131, |
|
"learning_rate": 2.0686299281358837e-06, |
|
"loss": 1.4318, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.556, |
|
"grad_norm": 1.4977188110351562, |
|
"learning_rate": 2.0624423525618097e-06, |
|
"loss": 3.6224, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5568, |
|
"grad_norm": 3.6773321628570557, |
|
"learning_rate": 2.0562575408396475e-06, |
|
"loss": 1.1651, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5576, |
|
"grad_norm": 1.449863314628601, |
|
"learning_rate": 2.0500755320360263e-06, |
|
"loss": 3.6073, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5584, |
|
"grad_norm": 3.81058406829834, |
|
"learning_rate": 2.0438963651998747e-06, |
|
"loss": 1.2255, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5592, |
|
"grad_norm": 1.1542376279830933, |
|
"learning_rate": 2.0377200793621694e-06, |
|
"loss": 3.6066, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.023213863372803, |
|
"learning_rate": 2.031546713535688e-06, |
|
"loss": 1.3477, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5608, |
|
"grad_norm": 1.3673769235610962, |
|
"learning_rate": 2.0253763067147657e-06, |
|
"loss": 3.5453, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5616, |
|
"grad_norm": 4.080592155456543, |
|
"learning_rate": 2.019208897875043e-06, |
|
"loss": 1.4669, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5624, |
|
"grad_norm": 1.4954679012298584, |
|
"learning_rate": 2.0130445259732282e-06, |
|
"loss": 3.4227, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5632, |
|
"grad_norm": 4.1900248527526855, |
|
"learning_rate": 2.006883229946843e-06, |
|
"loss": 1.4427, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.564, |
|
"grad_norm": 1.4168885946273804, |
|
"learning_rate": 2.0007250487139827e-06, |
|
"loss": 3.6209, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5648, |
|
"grad_norm": 3.834075450897217, |
|
"learning_rate": 1.994570021173067e-06, |
|
"loss": 1.2146, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5656, |
|
"grad_norm": 1.18809974193573, |
|
"learning_rate": 1.9884181862025938e-06, |
|
"loss": 3.5612, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5664, |
|
"grad_norm": 3.8719165325164795, |
|
"learning_rate": 1.9822695826608975e-06, |
|
"loss": 1.4709, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5672, |
|
"grad_norm": 1.2471320629119873, |
|
"learning_rate": 1.9761242493858987e-06, |
|
"loss": 3.5347, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.568, |
|
"grad_norm": 3.889285087585449, |
|
"learning_rate": 1.969982225194864e-06, |
|
"loss": 1.1893, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5688, |
|
"grad_norm": 1.6830719709396362, |
|
"learning_rate": 1.9638435488841543e-06, |
|
"loss": 3.3654, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5696, |
|
"grad_norm": 3.806553363800049, |
|
"learning_rate": 1.957708259228987e-06, |
|
"loss": 1.179, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5704, |
|
"grad_norm": 1.273412823677063, |
|
"learning_rate": 1.9515763949831852e-06, |
|
"loss": 3.5977, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5712, |
|
"grad_norm": 3.846447229385376, |
|
"learning_rate": 1.945447994878937e-06, |
|
"loss": 1.559, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.572, |
|
"grad_norm": 1.3436466455459595, |
|
"learning_rate": 1.9393230976265478e-06, |
|
"loss": 3.6578, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5728, |
|
"grad_norm": 3.7785065174102783, |
|
"learning_rate": 1.933201741914196e-06, |
|
"loss": 1.4349, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5736, |
|
"grad_norm": 1.8797110319137573, |
|
"learning_rate": 1.9270839664076937e-06, |
|
"loss": 3.545, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5744, |
|
"grad_norm": 4.088225841522217, |
|
"learning_rate": 1.920969809750234e-06, |
|
"loss": 1.31, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5752, |
|
"grad_norm": 1.348626732826233, |
|
"learning_rate": 1.9148593105621542e-06, |
|
"loss": 3.5437, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 3.5283923149108887, |
|
"learning_rate": 1.908752507440689e-06, |
|
"loss": 1.179, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5768, |
|
"grad_norm": 1.4678329229354858, |
|
"learning_rate": 1.9026494389597239e-06, |
|
"loss": 3.5683, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5776, |
|
"grad_norm": 4.486749172210693, |
|
"learning_rate": 1.8965501436695578e-06, |
|
"loss": 1.2648, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5784, |
|
"grad_norm": 1.4773081541061401, |
|
"learning_rate": 1.8904546600966539e-06, |
|
"loss": 3.5973, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5792, |
|
"grad_norm": 4.043974876403809, |
|
"learning_rate": 1.8843630267434e-06, |
|
"loss": 1.425, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.2826696634292603, |
|
"learning_rate": 1.8782752820878636e-06, |
|
"loss": 3.5147, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5808, |
|
"grad_norm": 3.6155593395233154, |
|
"learning_rate": 1.872191464583547e-06, |
|
"loss": 1.4485, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5816, |
|
"grad_norm": 1.2381564378738403, |
|
"learning_rate": 1.8661116126591492e-06, |
|
"loss": 3.64, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5824, |
|
"grad_norm": 4.1232380867004395, |
|
"learning_rate": 1.8600357647183188e-06, |
|
"loss": 1.3699, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5832, |
|
"grad_norm": 1.070135474205017, |
|
"learning_rate": 1.8539639591394131e-06, |
|
"loss": 3.5735, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.584, |
|
"grad_norm": 3.9993014335632324, |
|
"learning_rate": 1.8478962342752584e-06, |
|
"loss": 1.46, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5848, |
|
"grad_norm": 1.5479552745819092, |
|
"learning_rate": 1.8418326284528997e-06, |
|
"loss": 3.431, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5856, |
|
"grad_norm": 4.261895656585693, |
|
"learning_rate": 1.8357731799733686e-06, |
|
"loss": 1.5391, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5864, |
|
"grad_norm": 0.9864424467086792, |
|
"learning_rate": 1.8297179271114345e-06, |
|
"loss": 3.6108, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5872, |
|
"grad_norm": 4.133561134338379, |
|
"learning_rate": 1.8236669081153657e-06, |
|
"loss": 1.3051, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.588, |
|
"grad_norm": 1.7257312536239624, |
|
"learning_rate": 1.8176201612066874e-06, |
|
"loss": 3.5698, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5888, |
|
"grad_norm": 3.8284997940063477, |
|
"learning_rate": 1.8115777245799383e-06, |
|
"loss": 1.1011, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5896, |
|
"grad_norm": 1.4894834756851196, |
|
"learning_rate": 1.8055396364024318e-06, |
|
"loss": 3.5975, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5904, |
|
"grad_norm": 4.291233539581299, |
|
"learning_rate": 1.7995059348140165e-06, |
|
"loss": 1.4558, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5912, |
|
"grad_norm": 1.2095164060592651, |
|
"learning_rate": 1.7934766579268292e-06, |
|
"loss": 3.5745, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.592, |
|
"grad_norm": 4.15226936340332, |
|
"learning_rate": 1.7874518438250598e-06, |
|
"loss": 1.4725, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5928, |
|
"grad_norm": 1.2965120077133179, |
|
"learning_rate": 1.7814315305647095e-06, |
|
"loss": 3.5479, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5936, |
|
"grad_norm": 3.704596519470215, |
|
"learning_rate": 1.7754157561733476e-06, |
|
"loss": 1.2924, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5944, |
|
"grad_norm": 1.8090176582336426, |
|
"learning_rate": 1.7694045586498754e-06, |
|
"loss": 3.418, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5952, |
|
"grad_norm": 3.9790186882019043, |
|
"learning_rate": 1.7633979759642844e-06, |
|
"loss": 1.4173, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.596, |
|
"grad_norm": 1.8232885599136353, |
|
"learning_rate": 1.7573960460574133e-06, |
|
"loss": 3.5081, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5968, |
|
"grad_norm": 3.6959445476531982, |
|
"learning_rate": 1.7513988068407145e-06, |
|
"loss": 1.2422, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5976, |
|
"grad_norm": 1.4322175979614258, |
|
"learning_rate": 1.7454062961960102e-06, |
|
"loss": 3.5851, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5984, |
|
"grad_norm": 3.444291591644287, |
|
"learning_rate": 1.7394185519752546e-06, |
|
"loss": 1.2407, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5992, |
|
"grad_norm": 1.024861454963684, |
|
"learning_rate": 1.7334356120002956e-06, |
|
"loss": 3.6587, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 4.007371425628662, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"loss": 1.3341, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6008, |
|
"grad_norm": 1.387477159500122, |
|
"learning_rate": 1.7214842959231796e-06, |
|
"loss": 3.5696, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6016, |
|
"grad_norm": 3.6198816299438477, |
|
"learning_rate": 1.7155159953120315e-06, |
|
"loss": 1.1709, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.6024, |
|
"grad_norm": 1.5271052122116089, |
|
"learning_rate": 1.7095526499282172e-06, |
|
"loss": 3.5466, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6032, |
|
"grad_norm": 4.3780317306518555, |
|
"learning_rate": 1.703594297439469e-06, |
|
"loss": 1.4056, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.604, |
|
"grad_norm": 1.0889999866485596, |
|
"learning_rate": 1.6976409754819767e-06, |
|
"loss": 3.6382, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6048, |
|
"grad_norm": 4.148120403289795, |
|
"learning_rate": 1.6916927216601593e-06, |
|
"loss": 1.3061, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6056, |
|
"grad_norm": 1.0028917789459229, |
|
"learning_rate": 1.6857495735464196e-06, |
|
"loss": 3.6111, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6064, |
|
"grad_norm": 3.956118583679199, |
|
"learning_rate": 1.6798115686809125e-06, |
|
"loss": 1.4431, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6072, |
|
"grad_norm": 1.1292115449905396, |
|
"learning_rate": 1.673878744571304e-06, |
|
"loss": 3.6654, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 3.675584554672241, |
|
"learning_rate": 1.6679511386925337e-06, |
|
"loss": 1.2957, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6088, |
|
"grad_norm": 1.6884305477142334, |
|
"learning_rate": 1.6620287884865831e-06, |
|
"loss": 3.471, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6096, |
|
"grad_norm": 3.8323042392730713, |
|
"learning_rate": 1.656111731362236e-06, |
|
"loss": 1.1559, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6104, |
|
"grad_norm": 1.2776001691818237, |
|
"learning_rate": 1.650200004694839e-06, |
|
"loss": 3.5601, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6112, |
|
"grad_norm": 3.951807737350464, |
|
"learning_rate": 1.6442936458260723e-06, |
|
"loss": 1.2963, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.612, |
|
"grad_norm": 1.0104762315750122, |
|
"learning_rate": 1.6383926920637077e-06, |
|
"loss": 3.6454, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6128, |
|
"grad_norm": 3.8364481925964355, |
|
"learning_rate": 1.6324971806813766e-06, |
|
"loss": 1.2477, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6136, |
|
"grad_norm": 1.404075264930725, |
|
"learning_rate": 1.6266071489183327e-06, |
|
"loss": 3.5319, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6144, |
|
"grad_norm": 3.647761583328247, |
|
"learning_rate": 1.620722633979219e-06, |
|
"loss": 1.3192, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6152, |
|
"grad_norm": 1.2602980136871338, |
|
"learning_rate": 1.6148436730338279e-06, |
|
"loss": 3.5468, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.616, |
|
"grad_norm": 4.292653560638428, |
|
"learning_rate": 1.6089703032168736e-06, |
|
"loss": 1.1626, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6168, |
|
"grad_norm": 1.8109797239303589, |
|
"learning_rate": 1.6031025616277512e-06, |
|
"loss": 3.5154, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6176, |
|
"grad_norm": 4.427074909210205, |
|
"learning_rate": 1.5972404853303061e-06, |
|
"loss": 1.1841, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.6184, |
|
"grad_norm": 1.114534854888916, |
|
"learning_rate": 1.591384111352599e-06, |
|
"loss": 3.5374, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6192, |
|
"grad_norm": 3.930265426635742, |
|
"learning_rate": 1.585533476686669e-06, |
|
"loss": 1.203, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.7864525318145752, |
|
"learning_rate": 1.5796886182883053e-06, |
|
"loss": 3.4942, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6208, |
|
"grad_norm": 4.248049259185791, |
|
"learning_rate": 1.5738495730768104e-06, |
|
"loss": 1.5361, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6216, |
|
"grad_norm": 1.1578404903411865, |
|
"learning_rate": 1.5680163779347668e-06, |
|
"loss": 3.5659, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6224, |
|
"grad_norm": 4.111908435821533, |
|
"learning_rate": 1.5621890697078069e-06, |
|
"loss": 1.582, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.6232, |
|
"grad_norm": 1.2350143194198608, |
|
"learning_rate": 1.5563676852043738e-06, |
|
"loss": 3.5397, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 4.6647562980651855, |
|
"learning_rate": 1.5505522611954977e-06, |
|
"loss": 1.5677, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6248, |
|
"grad_norm": 1.5898746252059937, |
|
"learning_rate": 1.5447428344145565e-06, |
|
"loss": 3.4637, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.6256, |
|
"grad_norm": 4.031108856201172, |
|
"learning_rate": 1.538939441557048e-06, |
|
"loss": 1.5085, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6264, |
|
"grad_norm": 1.1129035949707031, |
|
"learning_rate": 1.5331421192803565e-06, |
|
"loss": 3.7525, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6272, |
|
"grad_norm": 3.7480621337890625, |
|
"learning_rate": 1.5273509042035172e-06, |
|
"loss": 1.3526, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.628, |
|
"grad_norm": 1.4506335258483887, |
|
"learning_rate": 1.521565832906994e-06, |
|
"loss": 3.4543, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6288, |
|
"grad_norm": 4.091665267944336, |
|
"learning_rate": 1.515786941932441e-06, |
|
"loss": 1.3925, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6296, |
|
"grad_norm": 1.7259176969528198, |
|
"learning_rate": 1.5100142677824752e-06, |
|
"loss": 3.5212, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.6304, |
|
"grad_norm": 3.6364309787750244, |
|
"learning_rate": 1.5042478469204437e-06, |
|
"loss": 1.486, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.6312, |
|
"grad_norm": 1.0510691404342651, |
|
"learning_rate": 1.4984877157701932e-06, |
|
"loss": 3.5759, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.632, |
|
"grad_norm": 3.974539041519165, |
|
"learning_rate": 1.4927339107158437e-06, |
|
"loss": 1.3787, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6328, |
|
"grad_norm": 1.5087684392929077, |
|
"learning_rate": 1.486986468101555e-06, |
|
"loss": 3.547, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6336, |
|
"grad_norm": 3.6339049339294434, |
|
"learning_rate": 1.481245424231298e-06, |
|
"loss": 1.321, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6344, |
|
"grad_norm": 1.1450809240341187, |
|
"learning_rate": 1.4755108153686275e-06, |
|
"loss": 3.6239, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6352, |
|
"grad_norm": 3.5662426948547363, |
|
"learning_rate": 1.4697826777364478e-06, |
|
"loss": 1.2403, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.636, |
|
"grad_norm": 1.2532669305801392, |
|
"learning_rate": 1.46406104751679e-06, |
|
"loss": 3.5814, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6368, |
|
"grad_norm": 3.5871071815490723, |
|
"learning_rate": 1.45834596085058e-06, |
|
"loss": 1.2413, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6376, |
|
"grad_norm": 1.7455424070358276, |
|
"learning_rate": 1.4526374538374133e-06, |
|
"loss": 3.5806, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6384, |
|
"grad_norm": 4.081576824188232, |
|
"learning_rate": 1.4469355625353199e-06, |
|
"loss": 1.314, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6392, |
|
"grad_norm": 1.2774088382720947, |
|
"learning_rate": 1.4412403229605453e-06, |
|
"loss": 3.5766, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.024228572845459, |
|
"learning_rate": 1.4355517710873184e-06, |
|
"loss": 1.3179, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6408, |
|
"grad_norm": 1.5069676637649536, |
|
"learning_rate": 1.4298699428476236e-06, |
|
"loss": 3.4628, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.6416, |
|
"grad_norm": 3.8722047805786133, |
|
"learning_rate": 1.4241948741309783e-06, |
|
"loss": 1.2991, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6424, |
|
"grad_norm": 1.4869807958602905, |
|
"learning_rate": 1.418526600784198e-06, |
|
"loss": 3.5303, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.6432, |
|
"grad_norm": 4.096463680267334, |
|
"learning_rate": 1.412865158611179e-06, |
|
"loss": 1.4464, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.644, |
|
"grad_norm": 1.3232511281967163, |
|
"learning_rate": 1.4072105833726685e-06, |
|
"loss": 3.5599, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6448, |
|
"grad_norm": 3.500465154647827, |
|
"learning_rate": 1.401562910786034e-06, |
|
"loss": 1.3568, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.6456, |
|
"grad_norm": 1.6436785459518433, |
|
"learning_rate": 1.395922176525047e-06, |
|
"loss": 3.5835, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6464, |
|
"grad_norm": 3.5307986736297607, |
|
"learning_rate": 1.3902884162196509e-06, |
|
"loss": 1.3578, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.6472, |
|
"grad_norm": 1.2310173511505127, |
|
"learning_rate": 1.384661665455736e-06, |
|
"loss": 3.626, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.648, |
|
"grad_norm": 5.397148132324219, |
|
"learning_rate": 1.3790419597749198e-06, |
|
"loss": 1.3758, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6488, |
|
"grad_norm": 1.2223182916641235, |
|
"learning_rate": 1.373429334674317e-06, |
|
"loss": 3.5392, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.6496, |
|
"grad_norm": 5.135192394256592, |
|
"learning_rate": 1.3678238256063193e-06, |
|
"loss": 1.27, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.6504, |
|
"grad_norm": 1.457159161567688, |
|
"learning_rate": 1.3622254679783665e-06, |
|
"loss": 3.5182, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.6512, |
|
"grad_norm": 3.729689359664917, |
|
"learning_rate": 1.356634297152729e-06, |
|
"loss": 1.219, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.652, |
|
"grad_norm": 1.7926121950149536, |
|
"learning_rate": 1.3510503484462807e-06, |
|
"loss": 3.4169, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6528, |
|
"grad_norm": 3.46643328666687, |
|
"learning_rate": 1.3454736571302761e-06, |
|
"loss": 1.2486, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6536, |
|
"grad_norm": 1.3711421489715576, |
|
"learning_rate": 1.3399042584301298e-06, |
|
"loss": 3.5197, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6544, |
|
"grad_norm": 4.594119071960449, |
|
"learning_rate": 1.334342187525189e-06, |
|
"loss": 1.2484, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.6552, |
|
"grad_norm": 1.1788302659988403, |
|
"learning_rate": 1.3287874795485168e-06, |
|
"loss": 3.574, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.656, |
|
"grad_norm": 3.5496530532836914, |
|
"learning_rate": 1.3232401695866686e-06, |
|
"loss": 1.1791, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6568, |
|
"grad_norm": 1.140120267868042, |
|
"learning_rate": 1.3177002926794685e-06, |
|
"loss": 3.6431, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.6576, |
|
"grad_norm": 4.5700554847717285, |
|
"learning_rate": 1.312167883819791e-06, |
|
"loss": 1.3331, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6584, |
|
"grad_norm": 1.6417975425720215, |
|
"learning_rate": 1.3066429779533352e-06, |
|
"loss": 3.4451, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.6592, |
|
"grad_norm": 3.6675314903259277, |
|
"learning_rate": 1.3011256099784103e-06, |
|
"loss": 1.1985, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.9253246784210205, |
|
"learning_rate": 1.2956158147457116e-06, |
|
"loss": 3.6082, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6608, |
|
"grad_norm": 4.173038482666016, |
|
"learning_rate": 1.2901136270580994e-06, |
|
"loss": 1.2908, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6616, |
|
"grad_norm": 1.7744218111038208, |
|
"learning_rate": 1.2846190816703836e-06, |
|
"loss": 3.4493, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6624, |
|
"grad_norm": 3.8822882175445557, |
|
"learning_rate": 1.279132213289096e-06, |
|
"loss": 1.5025, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6632, |
|
"grad_norm": 1.4533785581588745, |
|
"learning_rate": 1.273653056572282e-06, |
|
"loss": 3.5351, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.664, |
|
"grad_norm": 3.9480459690093994, |
|
"learning_rate": 1.2681816461292715e-06, |
|
"loss": 1.3216, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6648, |
|
"grad_norm": 1.3655693531036377, |
|
"learning_rate": 1.2627180165204671e-06, |
|
"loss": 3.5135, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6656, |
|
"grad_norm": 3.7476413249969482, |
|
"learning_rate": 1.257262202257124e-06, |
|
"loss": 1.4918, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6664, |
|
"grad_norm": 1.7849209308624268, |
|
"learning_rate": 1.251814237801128e-06, |
|
"loss": 3.4437, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6672, |
|
"grad_norm": 4.042788982391357, |
|
"learning_rate": 1.246374157564785e-06, |
|
"loss": 1.1764, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.668, |
|
"grad_norm": 1.2156387567520142, |
|
"learning_rate": 1.2409419959105981e-06, |
|
"loss": 3.565, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6688, |
|
"grad_norm": 3.900473117828369, |
|
"learning_rate": 1.2355177871510538e-06, |
|
"loss": 1.2951, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6696, |
|
"grad_norm": 1.0474777221679688, |
|
"learning_rate": 1.2301015655484006e-06, |
|
"loss": 3.6051, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6704, |
|
"grad_norm": 3.8230295181274414, |
|
"learning_rate": 1.2246933653144386e-06, |
|
"loss": 1.4542, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6712, |
|
"grad_norm": 1.6013360023498535, |
|
"learning_rate": 1.2192932206103e-06, |
|
"loss": 3.4223, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 3.603398084640503, |
|
"learning_rate": 1.2139011655462338e-06, |
|
"loss": 1.1428, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6728, |
|
"grad_norm": 0.9630873203277588, |
|
"learning_rate": 1.208517234181391e-06, |
|
"loss": 3.63, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6736, |
|
"grad_norm": 3.746964931488037, |
|
"learning_rate": 1.2031414605236066e-06, |
|
"loss": 1.2324, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6744, |
|
"grad_norm": 1.1261411905288696, |
|
"learning_rate": 1.1977738785291894e-06, |
|
"loss": 3.5977, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6752, |
|
"grad_norm": 3.895467519760132, |
|
"learning_rate": 1.1924145221027048e-06, |
|
"loss": 1.1571, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.676, |
|
"grad_norm": 1.2304555177688599, |
|
"learning_rate": 1.1870634250967606e-06, |
|
"loss": 3.613, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6768, |
|
"grad_norm": 3.7354040145874023, |
|
"learning_rate": 1.1817206213117943e-06, |
|
"loss": 1.4115, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6776, |
|
"grad_norm": 1.3557534217834473, |
|
"learning_rate": 1.1763861444958573e-06, |
|
"loss": 3.5227, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6784, |
|
"grad_norm": 8.678403854370117, |
|
"learning_rate": 1.1710600283444048e-06, |
|
"loss": 1.3812, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6792, |
|
"grad_norm": 1.2234259843826294, |
|
"learning_rate": 1.1657423065000811e-06, |
|
"loss": 3.5525, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 4.474430084228516, |
|
"learning_rate": 1.160433012552508e-06, |
|
"loss": 1.5074, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6808, |
|
"grad_norm": 1.9095535278320312, |
|
"learning_rate": 1.1551321800380722e-06, |
|
"loss": 3.3455, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6816, |
|
"grad_norm": 4.141076564788818, |
|
"learning_rate": 1.1498398424397106e-06, |
|
"loss": 1.2947, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6824, |
|
"grad_norm": 1.9714593887329102, |
|
"learning_rate": 1.1445560331867054e-06, |
|
"loss": 3.455, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6832, |
|
"grad_norm": 4.287348747253418, |
|
"learning_rate": 1.1392807856544682e-06, |
|
"loss": 1.3707, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.684, |
|
"grad_norm": 1.3626141548156738, |
|
"learning_rate": 1.1340141331643276e-06, |
|
"loss": 3.4847, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6848, |
|
"grad_norm": 4.172240734100342, |
|
"learning_rate": 1.128756108983325e-06, |
|
"loss": 1.1837, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6856, |
|
"grad_norm": 1.6149402856826782, |
|
"learning_rate": 1.123506746323997e-06, |
|
"loss": 3.3876, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6864, |
|
"grad_norm": 4.046041011810303, |
|
"learning_rate": 1.1182660783441719e-06, |
|
"loss": 1.199, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6872, |
|
"grad_norm": 1.2951021194458008, |
|
"learning_rate": 1.1130341381467569e-06, |
|
"loss": 3.546, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.688, |
|
"grad_norm": 3.817901611328125, |
|
"learning_rate": 1.1078109587795311e-06, |
|
"loss": 1.2792, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6888, |
|
"grad_norm": 1.45967435836792, |
|
"learning_rate": 1.1025965732349318e-06, |
|
"loss": 3.5619, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6896, |
|
"grad_norm": 3.8560800552368164, |
|
"learning_rate": 1.0973910144498534e-06, |
|
"loss": 1.3367, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6904, |
|
"grad_norm": 1.186650037765503, |
|
"learning_rate": 1.0921943153054343e-06, |
|
"loss": 3.5638, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6912, |
|
"grad_norm": 3.8473381996154785, |
|
"learning_rate": 1.0870065086268506e-06, |
|
"loss": 1.3076, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.692, |
|
"grad_norm": 1.6394022703170776, |
|
"learning_rate": 1.0818276271831094e-06, |
|
"loss": 3.5127, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6928, |
|
"grad_norm": 4.1624016761779785, |
|
"learning_rate": 1.0766577036868395e-06, |
|
"loss": 1.3827, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6936, |
|
"grad_norm": 1.134089469909668, |
|
"learning_rate": 1.0714967707940876e-06, |
|
"loss": 3.5572, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6944, |
|
"grad_norm": 4.057480335235596, |
|
"learning_rate": 1.0663448611041114e-06, |
|
"loss": 1.4129, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6952, |
|
"grad_norm": 1.2894881963729858, |
|
"learning_rate": 1.0612020071591722e-06, |
|
"loss": 3.5994, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.696, |
|
"grad_norm": 3.5933890342712402, |
|
"learning_rate": 1.0560682414443315e-06, |
|
"loss": 1.1426, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6968, |
|
"grad_norm": 1.4715263843536377, |
|
"learning_rate": 1.0509435963872422e-06, |
|
"loss": 3.5776, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6976, |
|
"grad_norm": 3.6835391521453857, |
|
"learning_rate": 1.0458281043579482e-06, |
|
"loss": 1.3991, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6984, |
|
"grad_norm": 1.2193199396133423, |
|
"learning_rate": 1.0407217976686777e-06, |
|
"loss": 3.5754, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6992, |
|
"grad_norm": 3.6208441257476807, |
|
"learning_rate": 1.0356247085736388e-06, |
|
"loss": 1.2799, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.3012170791625977, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"loss": 3.5576, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7008, |
|
"grad_norm": 3.988499879837036, |
|
"learning_rate": 1.0254583118917699e-06, |
|
"loss": 1.4413, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.7016, |
|
"grad_norm": 1.3237192630767822, |
|
"learning_rate": 1.020389068521426e-06, |
|
"loss": 3.5586, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.7024, |
|
"grad_norm": 4.113298416137695, |
|
"learning_rate": 1.0153291711778825e-06, |
|
"loss": 1.4436, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.7032, |
|
"grad_norm": 1.1641186475753784, |
|
"learning_rate": 1.0102786518221997e-06, |
|
"loss": 3.5658, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 4.27529239654541, |
|
"learning_rate": 1.0052375423562038e-06, |
|
"loss": 1.3145, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7048, |
|
"grad_norm": 1.370846152305603, |
|
"learning_rate": 1.0002058746222807e-06, |
|
"loss": 3.5536, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.7056, |
|
"grad_norm": 4.043067932128906, |
|
"learning_rate": 9.951836804031795e-07, |
|
"loss": 1.2685, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.7064, |
|
"grad_norm": 1.643572211265564, |
|
"learning_rate": 9.90170991421808e-07, |
|
"loss": 3.5677, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.7072, |
|
"grad_norm": 4.03674840927124, |
|
"learning_rate": 9.851678393410343e-07, |
|
"loss": 1.3122, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.708, |
|
"grad_norm": 1.0866400003433228, |
|
"learning_rate": 9.801742557634872e-07, |
|
"loss": 3.5932, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7088, |
|
"grad_norm": 3.896414279937744, |
|
"learning_rate": 9.751902722313527e-07, |
|
"loss": 1.2974, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.7096, |
|
"grad_norm": 1.1581923961639404, |
|
"learning_rate": 9.702159202261802e-07, |
|
"loss": 3.5641, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.7104, |
|
"grad_norm": 3.8378193378448486, |
|
"learning_rate": 9.65251231168681e-07, |
|
"loss": 1.2477, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7112, |
|
"grad_norm": 1.1178447008132935, |
|
"learning_rate": 9.602962364185286e-07, |
|
"loss": 3.5832, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.712, |
|
"grad_norm": 3.76153302192688, |
|
"learning_rate": 9.553509672741646e-07, |
|
"loss": 1.5284, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7128, |
|
"grad_norm": 1.6611312627792358, |
|
"learning_rate": 9.504154549725944e-07, |
|
"loss": 3.4278, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.7136, |
|
"grad_norm": 3.821173906326294, |
|
"learning_rate": 9.454897306891972e-07, |
|
"loss": 1.3952, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.7144, |
|
"grad_norm": 0.9451780915260315, |
|
"learning_rate": 9.405738255375243e-07, |
|
"loss": 3.5839, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.7152, |
|
"grad_norm": 5.367844104766846, |
|
"learning_rate": 9.356677705691058e-07, |
|
"loss": 1.3163, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.716, |
|
"grad_norm": 1.4917246103286743, |
|
"learning_rate": 9.307715967732492e-07, |
|
"loss": 3.3808, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7168, |
|
"grad_norm": 4.245250225067139, |
|
"learning_rate": 9.258853350768499e-07, |
|
"loss": 1.3849, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.7176, |
|
"grad_norm": 1.8379777669906616, |
|
"learning_rate": 9.210090163441928e-07, |
|
"loss": 3.5479, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7184, |
|
"grad_norm": 3.840579032897949, |
|
"learning_rate": 9.161426713767574e-07, |
|
"loss": 1.3287, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.7192, |
|
"grad_norm": 1.2158552408218384, |
|
"learning_rate": 9.112863309130235e-07, |
|
"loss": 3.5524, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 4.019105434417725, |
|
"learning_rate": 9.064400256282757e-07, |
|
"loss": 1.2645, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7208, |
|
"grad_norm": 1.4201416969299316, |
|
"learning_rate": 9.01603786134413e-07, |
|
"loss": 3.5722, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.7216, |
|
"grad_norm": 3.683457851409912, |
|
"learning_rate": 8.967776429797529e-07, |
|
"loss": 1.2652, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.7224, |
|
"grad_norm": 1.3120098114013672, |
|
"learning_rate": 8.919616266488373e-07, |
|
"loss": 3.5835, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.7232, |
|
"grad_norm": 3.85827898979187, |
|
"learning_rate": 8.871557675622442e-07, |
|
"loss": 1.407, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.724, |
|
"grad_norm": 1.2667253017425537, |
|
"learning_rate": 8.823600960763901e-07, |
|
"loss": 3.5396, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7248, |
|
"grad_norm": 3.5598056316375732, |
|
"learning_rate": 8.775746424833428e-07, |
|
"loss": 1.1467, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.7256, |
|
"grad_norm": 1.2805604934692383, |
|
"learning_rate": 8.727994370106288e-07, |
|
"loss": 3.5316, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.7264, |
|
"grad_norm": 4.258754253387451, |
|
"learning_rate": 8.680345098210408e-07, |
|
"loss": 1.312, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.7272, |
|
"grad_norm": 1.3038127422332764, |
|
"learning_rate": 8.632798910124493e-07, |
|
"loss": 3.5995, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.728, |
|
"grad_norm": 3.3651838302612305, |
|
"learning_rate": 8.585356106176093e-07, |
|
"loss": 1.12, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7288, |
|
"grad_norm": 1.9212744235992432, |
|
"learning_rate": 8.538016986039751e-07, |
|
"loss": 3.5292, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.7296, |
|
"grad_norm": 4.390267848968506, |
|
"learning_rate": 8.49078184873508e-07, |
|
"loss": 1.2082, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.7304, |
|
"grad_norm": 1.133646845817566, |
|
"learning_rate": 8.443650992624877e-07, |
|
"loss": 3.6091, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.7312, |
|
"grad_norm": 3.671508550643921, |
|
"learning_rate": 8.396624715413251e-07, |
|
"loss": 1.2595, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.732, |
|
"grad_norm": 1.238884687423706, |
|
"learning_rate": 8.349703314143712e-07, |
|
"loss": 3.516, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7328, |
|
"grad_norm": 4.374630451202393, |
|
"learning_rate": 8.302887085197342e-07, |
|
"loss": 1.2724, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.7336, |
|
"grad_norm": 1.0681443214416504, |
|
"learning_rate": 8.256176324290885e-07, |
|
"loss": 3.5777, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.7344, |
|
"grad_norm": 4.399445056915283, |
|
"learning_rate": 8.209571326474897e-07, |
|
"loss": 1.5055, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7352, |
|
"grad_norm": 1.302098035812378, |
|
"learning_rate": 8.163072386131876e-07, |
|
"loss": 3.5391, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 4.033039093017578, |
|
"learning_rate": 8.116679796974389e-07, |
|
"loss": 1.4171, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7368, |
|
"grad_norm": 1.2380177974700928, |
|
"learning_rate": 8.070393852043251e-07, |
|
"loss": 3.5787, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7376, |
|
"grad_norm": 4.127280235290527, |
|
"learning_rate": 8.024214843705647e-07, |
|
"loss": 1.4362, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.7384, |
|
"grad_norm": 1.448819875717163, |
|
"learning_rate": 7.978143063653296e-07, |
|
"loss": 3.5109, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.7392, |
|
"grad_norm": 4.252338886260986, |
|
"learning_rate": 7.93217880290059e-07, |
|
"loss": 1.2241, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.3917127847671509, |
|
"learning_rate": 7.886322351782782e-07, |
|
"loss": 3.5236, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7408, |
|
"grad_norm": 3.9095723628997803, |
|
"learning_rate": 7.840573999954154e-07, |
|
"loss": 1.3039, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.7416, |
|
"grad_norm": 1.6759053468704224, |
|
"learning_rate": 7.794934036386139e-07, |
|
"loss": 3.5408, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.7424, |
|
"grad_norm": 3.9729490280151367, |
|
"learning_rate": 7.749402749365573e-07, |
|
"loss": 1.2951, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.7432, |
|
"grad_norm": 1.7310004234313965, |
|
"learning_rate": 7.703980426492791e-07, |
|
"loss": 3.4605, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.744, |
|
"grad_norm": 4.3605523109436035, |
|
"learning_rate": 7.65866735467988e-07, |
|
"loss": 1.2495, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7448, |
|
"grad_norm": 1.055009365081787, |
|
"learning_rate": 7.613463820148831e-07, |
|
"loss": 3.5749, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.7456, |
|
"grad_norm": 4.379756450653076, |
|
"learning_rate": 7.568370108429732e-07, |
|
"loss": 1.3678, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.7464, |
|
"grad_norm": 1.133419156074524, |
|
"learning_rate": 7.523386504358984e-07, |
|
"loss": 3.6624, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7472, |
|
"grad_norm": 3.2285141944885254, |
|
"learning_rate": 7.478513292077463e-07, |
|
"loss": 1.2785, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.748, |
|
"grad_norm": 1.2085245847702026, |
|
"learning_rate": 7.433750755028774e-07, |
|
"loss": 3.6372, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7488, |
|
"grad_norm": 3.985098123550415, |
|
"learning_rate": 7.389099175957426e-07, |
|
"loss": 1.3853, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.7496, |
|
"grad_norm": 1.3521220684051514, |
|
"learning_rate": 7.344558836907067e-07, |
|
"loss": 3.4587, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.7504, |
|
"grad_norm": 3.7972023487091064, |
|
"learning_rate": 7.300130019218688e-07, |
|
"loss": 1.4041, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.7512, |
|
"grad_norm": 1.1607991456985474, |
|
"learning_rate": 7.255813003528834e-07, |
|
"loss": 3.5921, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.752, |
|
"grad_norm": 4.701716423034668, |
|
"learning_rate": 7.211608069767867e-07, |
|
"loss": 1.1838, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7528, |
|
"grad_norm": 1.6962052583694458, |
|
"learning_rate": 7.167515497158179e-07, |
|
"loss": 3.4455, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.7536, |
|
"grad_norm": 3.769155502319336, |
|
"learning_rate": 7.123535564212419e-07, |
|
"loss": 1.417, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.7544, |
|
"grad_norm": 1.5282889604568481, |
|
"learning_rate": 7.079668548731757e-07, |
|
"loss": 3.4607, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.7552, |
|
"grad_norm": 4.213266372680664, |
|
"learning_rate": 7.035914727804085e-07, |
|
"loss": 1.1793, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.756, |
|
"grad_norm": 1.5362334251403809, |
|
"learning_rate": 6.992274377802328e-07, |
|
"loss": 3.5102, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7568, |
|
"grad_norm": 3.7498528957366943, |
|
"learning_rate": 6.94874777438265e-07, |
|
"loss": 1.2506, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.7576, |
|
"grad_norm": 1.2717052698135376, |
|
"learning_rate": 6.905335192482734e-07, |
|
"loss": 3.5799, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.7584, |
|
"grad_norm": 4.157364368438721, |
|
"learning_rate": 6.862036906320055e-07, |
|
"loss": 1.3018, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7592, |
|
"grad_norm": 1.7433124780654907, |
|
"learning_rate": 6.818853189390104e-07, |
|
"loss": 3.4984, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 4.441183567047119, |
|
"learning_rate": 6.775784314464717e-07, |
|
"loss": 1.4515, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7608, |
|
"grad_norm": 1.48224675655365, |
|
"learning_rate": 6.732830553590305e-07, |
|
"loss": 3.5688, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7616, |
|
"grad_norm": 3.9499704837799072, |
|
"learning_rate": 6.689992178086174e-07, |
|
"loss": 1.2271, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.7624, |
|
"grad_norm": 1.458235263824463, |
|
"learning_rate": 6.647269458542793e-07, |
|
"loss": 3.5244, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.7632, |
|
"grad_norm": 3.810727596282959, |
|
"learning_rate": 6.604662664820063e-07, |
|
"loss": 1.2276, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.764, |
|
"grad_norm": 1.6759514808654785, |
|
"learning_rate": 6.562172066045655e-07, |
|
"loss": 3.4945, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7648, |
|
"grad_norm": 4.024814128875732, |
|
"learning_rate": 6.519797930613289e-07, |
|
"loss": 1.3065, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.7656, |
|
"grad_norm": 1.238553524017334, |
|
"learning_rate": 6.477540526181036e-07, |
|
"loss": 3.5006, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7664, |
|
"grad_norm": 3.444575786590576, |
|
"learning_rate": 6.435400119669618e-07, |
|
"loss": 1.3996, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.7672, |
|
"grad_norm": 1.3021897077560425, |
|
"learning_rate": 6.393376977260754e-07, |
|
"loss": 3.5961, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 4.322812080383301, |
|
"learning_rate": 6.351471364395448e-07, |
|
"loss": 1.5874, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7688, |
|
"grad_norm": 1.3130619525909424, |
|
"learning_rate": 6.309683545772327e-07, |
|
"loss": 3.5893, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.7696, |
|
"grad_norm": 4.154742240905762, |
|
"learning_rate": 6.268013785345969e-07, |
|
"loss": 1.5529, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.7704, |
|
"grad_norm": 1.2372699975967407, |
|
"learning_rate": 6.226462346325221e-07, |
|
"loss": 3.5887, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7712, |
|
"grad_norm": 3.7366716861724854, |
|
"learning_rate": 6.185029491171554e-07, |
|
"loss": 1.3078, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.772, |
|
"grad_norm": 1.2591793537139893, |
|
"learning_rate": 6.143715481597404e-07, |
|
"loss": 3.5405, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7728, |
|
"grad_norm": 3.966529369354248, |
|
"learning_rate": 6.102520578564508e-07, |
|
"loss": 1.2979, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7736, |
|
"grad_norm": 1.7405962944030762, |
|
"learning_rate": 6.061445042282271e-07, |
|
"loss": 3.4681, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7744, |
|
"grad_norm": 4.989678859710693, |
|
"learning_rate": 6.02048913220609e-07, |
|
"loss": 1.6273, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7752, |
|
"grad_norm": 1.1819043159484863, |
|
"learning_rate": 5.979653107035754e-07, |
|
"loss": 3.5553, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.776, |
|
"grad_norm": 4.24968957901001, |
|
"learning_rate": 5.9389372247138e-07, |
|
"loss": 1.7848, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7768, |
|
"grad_norm": 1.146349549293518, |
|
"learning_rate": 5.898341742423866e-07, |
|
"loss": 3.5557, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7776, |
|
"grad_norm": 3.359968423843384, |
|
"learning_rate": 5.857866916589089e-07, |
|
"loss": 1.1097, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7784, |
|
"grad_norm": 1.3294552564620972, |
|
"learning_rate": 5.817513002870451e-07, |
|
"loss": 3.5291, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7792, |
|
"grad_norm": 3.7747585773468018, |
|
"learning_rate": 5.777280256165218e-07, |
|
"loss": 1.1422, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.3020869493484497, |
|
"learning_rate": 5.737168930605272e-07, |
|
"loss": 3.5797, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7808, |
|
"grad_norm": 4.284913063049316, |
|
"learning_rate": 5.697179279555551e-07, |
|
"loss": 1.2182, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7816, |
|
"grad_norm": 1.17784583568573, |
|
"learning_rate": 5.657311555612433e-07, |
|
"loss": 3.5849, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7824, |
|
"grad_norm": 3.8503072261810303, |
|
"learning_rate": 5.617566010602113e-07, |
|
"loss": 1.1606, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7832, |
|
"grad_norm": 1.4357177019119263, |
|
"learning_rate": 5.577942895579064e-07, |
|
"loss": 3.4606, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.784, |
|
"grad_norm": 4.020089626312256, |
|
"learning_rate": 5.538442460824417e-07, |
|
"loss": 1.2557, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7848, |
|
"grad_norm": 1.3439040184020996, |
|
"learning_rate": 5.499064955844383e-07, |
|
"loss": 3.5545, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7856, |
|
"grad_norm": 3.5121538639068604, |
|
"learning_rate": 5.459810629368692e-07, |
|
"loss": 1.1383, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7864, |
|
"grad_norm": 1.4466603994369507, |
|
"learning_rate": 5.420679729348993e-07, |
|
"loss": 3.4426, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7872, |
|
"grad_norm": 4.1092047691345215, |
|
"learning_rate": 5.381672502957324e-07, |
|
"loss": 1.3047, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.788, |
|
"grad_norm": 1.4652632474899292, |
|
"learning_rate": 5.342789196584527e-07, |
|
"loss": 3.4522, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7888, |
|
"grad_norm": 4.341894626617432, |
|
"learning_rate": 5.304030055838704e-07, |
|
"loss": 1.5886, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.7896, |
|
"grad_norm": 1.5312821865081787, |
|
"learning_rate": 5.26539532554364e-07, |
|
"loss": 3.4746, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7904, |
|
"grad_norm": 3.956395149230957, |
|
"learning_rate": 5.226885249737292e-07, |
|
"loss": 1.3278, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.7912, |
|
"grad_norm": 1.5505242347717285, |
|
"learning_rate": 5.188500071670235e-07, |
|
"loss": 3.4367, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.792, |
|
"grad_norm": 3.910429000854492, |
|
"learning_rate": 5.150240033804116e-07, |
|
"loss": 1.0932, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7928, |
|
"grad_norm": 1.518563985824585, |
|
"learning_rate": 5.112105377810128e-07, |
|
"loss": 3.412, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7936, |
|
"grad_norm": 3.3202965259552, |
|
"learning_rate": 5.074096344567475e-07, |
|
"loss": 1.1174, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.7944, |
|
"grad_norm": 1.5806505680084229, |
|
"learning_rate": 5.036213174161877e-07, |
|
"loss": 3.47, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7952, |
|
"grad_norm": 6.9575324058532715, |
|
"learning_rate": 4.998456105884025e-07, |
|
"loss": 1.5321, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.796, |
|
"grad_norm": 1.1276708841323853, |
|
"learning_rate": 4.960825378228082e-07, |
|
"loss": 3.6015, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7968, |
|
"grad_norm": 3.954547166824341, |
|
"learning_rate": 4.923321228890184e-07, |
|
"loss": 1.1861, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7976, |
|
"grad_norm": 1.08054780960083, |
|
"learning_rate": 4.885943894766909e-07, |
|
"loss": 3.5029, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7984, |
|
"grad_norm": 3.6978795528411865, |
|
"learning_rate": 4.848693611953825e-07, |
|
"loss": 1.3936, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7992, |
|
"grad_norm": 1.0338634252548218, |
|
"learning_rate": 4.811570615743952e-07, |
|
"loss": 3.6014, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.1188507080078125, |
|
"learning_rate": 4.774575140626317e-07, |
|
"loss": 1.2529, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8008, |
|
"grad_norm": 1.9042516946792603, |
|
"learning_rate": 4.7377074202844514e-07, |
|
"loss": 3.4267, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.8016, |
|
"grad_norm": 4.330513954162598, |
|
"learning_rate": 4.700967687594901e-07, |
|
"loss": 1.369, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.8024, |
|
"grad_norm": 1.0320863723754883, |
|
"learning_rate": 4.664356174625795e-07, |
|
"loss": 3.5636, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.8032, |
|
"grad_norm": 4.5047287940979, |
|
"learning_rate": 4.6278731126353447e-07, |
|
"loss": 1.3017, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.804, |
|
"grad_norm": 1.59553062915802, |
|
"learning_rate": 4.591518732070402e-07, |
|
"loss": 3.5466, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.8048, |
|
"grad_norm": 3.6305763721466064, |
|
"learning_rate": 4.555293262564994e-07, |
|
"loss": 1.3101, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.8056, |
|
"grad_norm": 1.155205488204956, |
|
"learning_rate": 4.5191969329388627e-07, |
|
"loss": 3.5494, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.8064, |
|
"grad_norm": 4.001699924468994, |
|
"learning_rate": 4.483229971196054e-07, |
|
"loss": 1.1268, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8072, |
|
"grad_norm": 1.1981041431427002, |
|
"learning_rate": 4.447392604523443e-07, |
|
"loss": 3.5732, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.808, |
|
"grad_norm": 3.6024370193481445, |
|
"learning_rate": 4.411685059289314e-07, |
|
"loss": 1.1444, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8088, |
|
"grad_norm": 1.3383228778839111, |
|
"learning_rate": 4.376107561041937e-07, |
|
"loss": 3.5367, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.8096, |
|
"grad_norm": 3.9421496391296387, |
|
"learning_rate": 4.340660334508115e-07, |
|
"loss": 1.3883, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.8104, |
|
"grad_norm": 1.0924482345581055, |
|
"learning_rate": 4.305343603591802e-07, |
|
"loss": 3.5681, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.8112, |
|
"grad_norm": 3.4752144813537598, |
|
"learning_rate": 4.2701575913726644e-07, |
|
"loss": 1.059, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.812, |
|
"grad_norm": 1.952444314956665, |
|
"learning_rate": 4.235102520104681e-07, |
|
"loss": 3.5588, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8128, |
|
"grad_norm": 4.0423688888549805, |
|
"learning_rate": 4.200178611214736e-07, |
|
"loss": 1.1042, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.8136, |
|
"grad_norm": 1.218482494354248, |
|
"learning_rate": 4.165386085301212e-07, |
|
"loss": 3.5486, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.8144, |
|
"grad_norm": 4.175278663635254, |
|
"learning_rate": 4.1307251621326124e-07, |
|
"loss": 1.4889, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.8152, |
|
"grad_norm": 2.6647427082061768, |
|
"learning_rate": 4.096196060646168e-07, |
|
"loss": 3.5716, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 4.009509563446045, |
|
"learning_rate": 4.061798998946459e-07, |
|
"loss": 1.2765, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8168, |
|
"grad_norm": 1.1483063697814941, |
|
"learning_rate": 4.0275341943040057e-07, |
|
"loss": 3.6826, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.8176, |
|
"grad_norm": 3.944807291030884, |
|
"learning_rate": 3.9934018631539506e-07, |
|
"loss": 1.2861, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.8184, |
|
"grad_norm": 1.6391054391860962, |
|
"learning_rate": 3.9594022210946355e-07, |
|
"loss": 3.3965, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.8192, |
|
"grad_norm": 3.9761102199554443, |
|
"learning_rate": 3.925535482886286e-07, |
|
"loss": 1.2771, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.8166158199310303, |
|
"learning_rate": 3.891801862449629e-07, |
|
"loss": 3.481, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8208, |
|
"grad_norm": 3.909714460372925, |
|
"learning_rate": 3.8582015728645366e-07, |
|
"loss": 1.3296, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8216, |
|
"grad_norm": 1.1448289155960083, |
|
"learning_rate": 3.8247348263687035e-07, |
|
"loss": 3.5438, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.8224, |
|
"grad_norm": 3.7021570205688477, |
|
"learning_rate": 3.7914018343562896e-07, |
|
"loss": 1.3568, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.8232, |
|
"grad_norm": 1.1746755838394165, |
|
"learning_rate": 3.75820280737659e-07, |
|
"loss": 3.631, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.824, |
|
"grad_norm": 4.372186660766602, |
|
"learning_rate": 3.725137955132707e-07, |
|
"loss": 1.5514, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8248, |
|
"grad_norm": 1.2693135738372803, |
|
"learning_rate": 3.6922074864802095e-07, |
|
"loss": 3.6151, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.8256, |
|
"grad_norm": 4.060328483581543, |
|
"learning_rate": 3.659411609425834e-07, |
|
"loss": 1.2585, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.8264, |
|
"grad_norm": 1.1194394826889038, |
|
"learning_rate": 3.626750531126169e-07, |
|
"loss": 3.5576, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.8272, |
|
"grad_norm": 4.196378707885742, |
|
"learning_rate": 3.594224457886336e-07, |
|
"loss": 1.1795, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.828, |
|
"grad_norm": 1.4582164287567139, |
|
"learning_rate": 3.561833595158698e-07, |
|
"loss": 3.4901, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8288, |
|
"grad_norm": 3.783414602279663, |
|
"learning_rate": 3.529578147541532e-07, |
|
"loss": 1.1758, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.8296, |
|
"grad_norm": 1.4051135778427124, |
|
"learning_rate": 3.4974583187777853e-07, |
|
"loss": 3.4493, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.8304, |
|
"grad_norm": 3.584596633911133, |
|
"learning_rate": 3.4654743117537525e-07, |
|
"loss": 1.2126, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.8312, |
|
"grad_norm": 1.3267326354980469, |
|
"learning_rate": 3.433626328497805e-07, |
|
"loss": 3.6435, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 4.257800579071045, |
|
"learning_rate": 3.4019145701791186e-07, |
|
"loss": 1.4825, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8328, |
|
"grad_norm": 1.1711785793304443, |
|
"learning_rate": 3.370339237106385e-07, |
|
"loss": 3.5212, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.8336, |
|
"grad_norm": 4.394068717956543, |
|
"learning_rate": 3.3389005287265713e-07, |
|
"loss": 1.1283, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.8344, |
|
"grad_norm": 1.297494888305664, |
|
"learning_rate": 3.3075986436236494e-07, |
|
"loss": 3.5152, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.8352, |
|
"grad_norm": 3.9251017570495605, |
|
"learning_rate": 3.2764337795173433e-07, |
|
"loss": 1.2356, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.836, |
|
"grad_norm": 1.0191597938537598, |
|
"learning_rate": 3.245406133261858e-07, |
|
"loss": 3.6092, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.8368, |
|
"grad_norm": 4.02804708480835, |
|
"learning_rate": 3.214515900844681e-07, |
|
"loss": 1.2928, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.8376, |
|
"grad_norm": 1.1345746517181396, |
|
"learning_rate": 3.18376327738531e-07, |
|
"loss": 3.5869, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.8384, |
|
"grad_norm": 4.080638408660889, |
|
"learning_rate": 3.15314845713402e-07, |
|
"loss": 1.3423, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.8392, |
|
"grad_norm": 1.3001468181610107, |
|
"learning_rate": 3.122671633470664e-07, |
|
"loss": 3.4875, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 3.684081792831421, |
|
"learning_rate": 3.092332998903416e-07, |
|
"loss": 1.3089, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8408, |
|
"grad_norm": 1.3111592531204224, |
|
"learning_rate": 3.0621327450675806e-07, |
|
"loss": 3.5502, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.8416, |
|
"grad_norm": 4.330699443817139, |
|
"learning_rate": 3.0320710627243815e-07, |
|
"loss": 1.4276, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.8424, |
|
"grad_norm": 1.4837126731872559, |
|
"learning_rate": 3.002148141759739e-07, |
|
"loss": 3.5433, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.8432, |
|
"grad_norm": 3.8255903720855713, |
|
"learning_rate": 2.9723641711830896e-07, |
|
"loss": 1.3503, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.844, |
|
"grad_norm": 1.0839451551437378, |
|
"learning_rate": 2.942719339126171e-07, |
|
"loss": 3.659, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.8448, |
|
"grad_norm": 4.035921573638916, |
|
"learning_rate": 2.913213832841857e-07, |
|
"loss": 1.3085, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.8456, |
|
"grad_norm": 1.2930865287780762, |
|
"learning_rate": 2.8838478387029605e-07, |
|
"loss": 3.4512, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.8464, |
|
"grad_norm": 3.7543997764587402, |
|
"learning_rate": 2.854621542201064e-07, |
|
"loss": 1.1318, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.8472, |
|
"grad_norm": 1.1573505401611328, |
|
"learning_rate": 2.8255351279453446e-07, |
|
"loss": 3.5605, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.848, |
|
"grad_norm": 3.8682708740234375, |
|
"learning_rate": 2.796588779661388e-07, |
|
"loss": 1.3628, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8488, |
|
"grad_norm": 2.039510726928711, |
|
"learning_rate": 2.767782680190073e-07, |
|
"loss": 3.5517, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.8496, |
|
"grad_norm": 3.9016358852386475, |
|
"learning_rate": 2.739117011486378e-07, |
|
"loss": 1.1586, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.8504, |
|
"grad_norm": 1.1205612421035767, |
|
"learning_rate": 2.710591954618247e-07, |
|
"loss": 3.5143, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.8512, |
|
"grad_norm": 4.346203327178955, |
|
"learning_rate": 2.6822076897654453e-07, |
|
"loss": 1.3599, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.852, |
|
"grad_norm": 1.4595547914505005, |
|
"learning_rate": 2.653964396218406e-07, |
|
"loss": 3.5174, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8528, |
|
"grad_norm": 3.893127918243408, |
|
"learning_rate": 2.625862252377129e-07, |
|
"loss": 1.1346, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.8536, |
|
"grad_norm": 1.3180551528930664, |
|
"learning_rate": 2.597901435750025e-07, |
|
"loss": 3.4543, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.8544, |
|
"grad_norm": 3.9734368324279785, |
|
"learning_rate": 2.5700821229528164e-07, |
|
"loss": 1.2548, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8552, |
|
"grad_norm": 1.505300521850586, |
|
"learning_rate": 2.5424044897073895e-07, |
|
"loss": 3.5335, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.856, |
|
"grad_norm": 3.921257972717285, |
|
"learning_rate": 2.514868710840723e-07, |
|
"loss": 1.5256, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8568, |
|
"grad_norm": 1.551336407661438, |
|
"learning_rate": 2.48747496028377e-07, |
|
"loss": 3.3823, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.8576, |
|
"grad_norm": 3.929121494293213, |
|
"learning_rate": 2.460223411070337e-07, |
|
"loss": 1.2628, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.8584, |
|
"grad_norm": 1.1952719688415527, |
|
"learning_rate": 2.4331142353360206e-07, |
|
"loss": 3.4138, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.8592, |
|
"grad_norm": 3.588552713394165, |
|
"learning_rate": 2.406147604317119e-07, |
|
"loss": 1.2508, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.0674008131027222, |
|
"learning_rate": 2.3793236883495164e-07, |
|
"loss": 3.5885, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8608, |
|
"grad_norm": 3.9291443824768066, |
|
"learning_rate": 2.3526426568676485e-07, |
|
"loss": 1.5289, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.8616, |
|
"grad_norm": 1.1263163089752197, |
|
"learning_rate": 2.3261046784034154e-07, |
|
"loss": 3.5685, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.8624, |
|
"grad_norm": 3.7272915840148926, |
|
"learning_rate": 2.299709920585108e-07, |
|
"loss": 1.0725, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.8632, |
|
"grad_norm": 1.9841383695602417, |
|
"learning_rate": 2.2734585501363676e-07, |
|
"loss": 3.4305, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 3.725369691848755, |
|
"learning_rate": 2.2473507328751086e-07, |
|
"loss": 1.2885, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8648, |
|
"grad_norm": 1.2514499425888062, |
|
"learning_rate": 2.2213866337125022e-07, |
|
"loss": 3.6041, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.8656, |
|
"grad_norm": 3.798311233520508, |
|
"learning_rate": 2.1955664166519036e-07, |
|
"loss": 1.3569, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.8664, |
|
"grad_norm": 1.05547034740448, |
|
"learning_rate": 2.1698902447878478e-07, |
|
"loss": 3.6443, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.8672, |
|
"grad_norm": 4.112440586090088, |
|
"learning_rate": 2.1443582803049757e-07, |
|
"loss": 1.3431, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.868, |
|
"grad_norm": 1.1724605560302734, |
|
"learning_rate": 2.118970684477062e-07, |
|
"loss": 3.5914, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8688, |
|
"grad_norm": 3.977243423461914, |
|
"learning_rate": 2.0937276176659553e-07, |
|
"loss": 1.4519, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8696, |
|
"grad_norm": 1.413366436958313, |
|
"learning_rate": 2.068629239320588e-07, |
|
"loss": 3.5239, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.8704, |
|
"grad_norm": 3.696100950241089, |
|
"learning_rate": 2.043675707975959e-07, |
|
"loss": 1.5434, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.8712, |
|
"grad_norm": 1.1970295906066895, |
|
"learning_rate": 2.0188671812521293e-07, |
|
"loss": 3.4977, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.872, |
|
"grad_norm": 4.029970169067383, |
|
"learning_rate": 1.9942038158532407e-07, |
|
"loss": 1.3306, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8728, |
|
"grad_norm": 1.2960518598556519, |
|
"learning_rate": 1.9696857675665122e-07, |
|
"loss": 3.5162, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.8736, |
|
"grad_norm": 3.725883960723877, |
|
"learning_rate": 1.9453131912612694e-07, |
|
"loss": 1.4022, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8744, |
|
"grad_norm": 1.3842031955718994, |
|
"learning_rate": 1.9210862408879373e-07, |
|
"loss": 3.5151, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.8752, |
|
"grad_norm": 3.8603460788726807, |
|
"learning_rate": 1.8970050694771064e-07, |
|
"loss": 1.2135, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.876, |
|
"grad_norm": 1.2414811849594116, |
|
"learning_rate": 1.8730698291385518e-07, |
|
"loss": 3.5374, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8768, |
|
"grad_norm": 4.625464916229248, |
|
"learning_rate": 1.8492806710602495e-07, |
|
"loss": 1.3096, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.8776, |
|
"grad_norm": 1.5665608644485474, |
|
"learning_rate": 1.8256377455074526e-07, |
|
"loss": 3.4397, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.8784, |
|
"grad_norm": 3.919268846511841, |
|
"learning_rate": 1.802141201821736e-07, |
|
"loss": 1.376, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8792, |
|
"grad_norm": 1.4185221195220947, |
|
"learning_rate": 1.7787911884200314e-07, |
|
"loss": 3.6158, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 4.121542930603027, |
|
"learning_rate": 1.7555878527937164e-07, |
|
"loss": 1.3549, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8808, |
|
"grad_norm": 1.706099033355713, |
|
"learning_rate": 1.7325313415076705e-07, |
|
"loss": 3.5284, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8816, |
|
"grad_norm": 4.369479656219482, |
|
"learning_rate": 1.7096218001993514e-07, |
|
"loss": 1.5352, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8824, |
|
"grad_norm": 1.2528761625289917, |
|
"learning_rate": 1.686859373577876e-07, |
|
"loss": 3.6018, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8832, |
|
"grad_norm": 3.7873117923736572, |
|
"learning_rate": 1.6642442054230935e-07, |
|
"loss": 1.1694, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.884, |
|
"grad_norm": 1.2879388332366943, |
|
"learning_rate": 1.6417764385846996e-07, |
|
"loss": 3.4757, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8848, |
|
"grad_norm": 3.334120988845825, |
|
"learning_rate": 1.6194562149813241e-07, |
|
"loss": 0.8637, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.8856, |
|
"grad_norm": 1.3120352029800415, |
|
"learning_rate": 1.5972836755996286e-07, |
|
"loss": 3.4815, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8864, |
|
"grad_norm": 3.6376547813415527, |
|
"learning_rate": 1.5752589604934255e-07, |
|
"loss": 1.2615, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.8872, |
|
"grad_norm": 1.1396851539611816, |
|
"learning_rate": 1.5533822087827805e-07, |
|
"loss": 3.5342, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.888, |
|
"grad_norm": 3.7635209560394287, |
|
"learning_rate": 1.5316535586531483e-07, |
|
"loss": 1.1877, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8888, |
|
"grad_norm": 1.371699571609497, |
|
"learning_rate": 1.5100731473544932e-07, |
|
"loss": 3.5637, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.8896, |
|
"grad_norm": 3.8787107467651367, |
|
"learning_rate": 1.4886411112004258e-07, |
|
"loss": 1.3821, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.8904, |
|
"grad_norm": 1.8077179193496704, |
|
"learning_rate": 1.4673575855673278e-07, |
|
"loss": 3.4341, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8912, |
|
"grad_norm": 4.23999547958374, |
|
"learning_rate": 1.4462227048935185e-07, |
|
"loss": 1.5234, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.892, |
|
"grad_norm": 1.4485225677490234, |
|
"learning_rate": 1.425236602678387e-07, |
|
"loss": 3.4551, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8928, |
|
"grad_norm": 3.488999128341675, |
|
"learning_rate": 1.4043994114815663e-07, |
|
"loss": 1.1846, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8936, |
|
"grad_norm": 1.237518072128296, |
|
"learning_rate": 1.38371126292208e-07, |
|
"loss": 3.5263, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8944, |
|
"grad_norm": 3.7093005180358887, |
|
"learning_rate": 1.3631722876775137e-07, |
|
"loss": 1.3514, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8952, |
|
"grad_norm": 1.2599142789840698, |
|
"learning_rate": 1.342782615483204e-07, |
|
"loss": 3.528, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 3.7309329509735107, |
|
"learning_rate": 1.3225423751313942e-07, |
|
"loss": 1.5911, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8968, |
|
"grad_norm": 1.202618956565857, |
|
"learning_rate": 1.3024516944704495e-07, |
|
"loss": 3.4832, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.8976, |
|
"grad_norm": 4.492614269256592, |
|
"learning_rate": 1.2825107004040272e-07, |
|
"loss": 1.2915, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8984, |
|
"grad_norm": 1.1479798555374146, |
|
"learning_rate": 1.262719518890279e-07, |
|
"loss": 3.5571, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8992, |
|
"grad_norm": 4.050600528717041, |
|
"learning_rate": 1.2430782749410676e-07, |
|
"loss": 1.388, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.292321801185608, |
|
"learning_rate": 1.223587092621162e-07, |
|
"loss": 3.5855, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.9008, |
|
"grad_norm": 4.229612350463867, |
|
"learning_rate": 1.204246095047465e-07, |
|
"loss": 1.3577, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.9016, |
|
"grad_norm": 1.274814248085022, |
|
"learning_rate": 1.1850554043882329e-07, |
|
"loss": 3.5057, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.9024, |
|
"grad_norm": 3.170250654220581, |
|
"learning_rate": 1.1660151418622923e-07, |
|
"loss": 0.8845, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.9032, |
|
"grad_norm": 1.3429255485534668, |
|
"learning_rate": 1.1471254277382882e-07, |
|
"loss": 3.5239, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.904, |
|
"grad_norm": 3.8732850551605225, |
|
"learning_rate": 1.1283863813339263e-07, |
|
"loss": 1.4954, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9048, |
|
"grad_norm": 1.0475130081176758, |
|
"learning_rate": 1.1097981210152042e-07, |
|
"loss": 3.5743, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.9056, |
|
"grad_norm": 4.163371562957764, |
|
"learning_rate": 1.0913607641956842e-07, |
|
"loss": 1.3211, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.9064, |
|
"grad_norm": 1.1388672590255737, |
|
"learning_rate": 1.0730744273357213e-07, |
|
"loss": 3.6136, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.9072, |
|
"grad_norm": 3.882986068725586, |
|
"learning_rate": 1.0549392259417646e-07, |
|
"loss": 1.1432, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.908, |
|
"grad_norm": 1.1615536212921143, |
|
"learning_rate": 1.0369552745656014e-07, |
|
"loss": 3.6521, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.9088, |
|
"grad_norm": 3.6023221015930176, |
|
"learning_rate": 1.0191226868036419e-07, |
|
"loss": 1.3323, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.9096, |
|
"grad_norm": 1.2144973278045654, |
|
"learning_rate": 1.0014415752962081e-07, |
|
"loss": 3.5626, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9104, |
|
"grad_norm": 3.877840280532837, |
|
"learning_rate": 9.839120517267986e-08, |
|
"loss": 1.3083, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.9112, |
|
"grad_norm": 1.4756907224655151, |
|
"learning_rate": 9.665342268214167e-08, |
|
"loss": 3.4514, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 4.363102436065674, |
|
"learning_rate": 9.493082103478519e-08, |
|
"loss": 1.1601, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9128, |
|
"grad_norm": 1.2879115343093872, |
|
"learning_rate": 9.322341111149852e-08, |
|
"loss": 3.4346, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.9136, |
|
"grad_norm": 4.510580539703369, |
|
"learning_rate": 9.153120369721047e-08, |
|
"loss": 1.3901, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.9144, |
|
"grad_norm": 1.3555859327316284, |
|
"learning_rate": 8.985420948082329e-08, |
|
"loss": 3.4953, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9152, |
|
"grad_norm": 4.071751594543457, |
|
"learning_rate": 8.819243905514308e-08, |
|
"loss": 1.2933, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.916, |
|
"grad_norm": 1.0624727010726929, |
|
"learning_rate": 8.654590291681531e-08, |
|
"loss": 3.6109, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9168, |
|
"grad_norm": 4.541050910949707, |
|
"learning_rate": 8.491461146625774e-08, |
|
"loss": 1.5013, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.9176, |
|
"grad_norm": 1.036971926689148, |
|
"learning_rate": 8.329857500759291e-08, |
|
"loss": 3.5826, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.9184, |
|
"grad_norm": 4.1964287757873535, |
|
"learning_rate": 8.169780374858577e-08, |
|
"loss": 1.4736, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.9192, |
|
"grad_norm": 1.3899742364883423, |
|
"learning_rate": 8.011230780057749e-08, |
|
"loss": 3.4604, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 3.7320985794067383, |
|
"learning_rate": 7.854209717842231e-08, |
|
"loss": 1.1507, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9208, |
|
"grad_norm": 1.4710829257965088, |
|
"learning_rate": 7.698718180042392e-08, |
|
"loss": 3.5542, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.9216, |
|
"grad_norm": 3.88554048538208, |
|
"learning_rate": 7.544757148827297e-08, |
|
"loss": 1.0699, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.9224, |
|
"grad_norm": 1.352371096611023, |
|
"learning_rate": 7.392327596698474e-08, |
|
"loss": 3.5077, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.9232, |
|
"grad_norm": 3.7906062602996826, |
|
"learning_rate": 7.24143048648382e-08, |
|
"loss": 1.3162, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.924, |
|
"grad_norm": 1.3275525569915771, |
|
"learning_rate": 7.092066771331507e-08, |
|
"loss": 3.516, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.9248, |
|
"grad_norm": 3.684339761734009, |
|
"learning_rate": 6.944237394703985e-08, |
|
"loss": 1.0855, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.9256, |
|
"grad_norm": 1.6030592918395996, |
|
"learning_rate": 6.797943290371839e-08, |
|
"loss": 3.3999, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.9264, |
|
"grad_norm": 3.9943041801452637, |
|
"learning_rate": 6.653185382408195e-08, |
|
"loss": 1.3748, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.9272, |
|
"grad_norm": 2.058311939239502, |
|
"learning_rate": 6.509964585182688e-08, |
|
"loss": 3.4637, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 4.087345123291016, |
|
"learning_rate": 6.368281803355692e-08, |
|
"loss": 1.3247, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9288, |
|
"grad_norm": 1.4231693744659424, |
|
"learning_rate": 6.228137931872713e-08, |
|
"loss": 3.5084, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.9296, |
|
"grad_norm": 3.276982545852661, |
|
"learning_rate": 6.089533855958508e-08, |
|
"loss": 1.0859, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.9304, |
|
"grad_norm": 0.9627519249916077, |
|
"learning_rate": 5.9524704511118305e-08, |
|
"loss": 3.6085, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.9312, |
|
"grad_norm": 4.000705242156982, |
|
"learning_rate": 5.8169485830996134e-08, |
|
"loss": 1.2021, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.932, |
|
"grad_norm": 1.0772417783737183, |
|
"learning_rate": 5.68296910795163e-08, |
|
"loss": 3.5649, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.9328, |
|
"grad_norm": 4.611580848693848, |
|
"learning_rate": 5.550532871955061e-08, |
|
"loss": 1.2716, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.9336, |
|
"grad_norm": 1.6169544458389282, |
|
"learning_rate": 5.419640711649188e-08, |
|
"loss": 3.4921, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.9344, |
|
"grad_norm": 3.6111767292022705, |
|
"learning_rate": 5.290293453819956e-08, |
|
"loss": 1.1447, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.9352, |
|
"grad_norm": 1.527208924293518, |
|
"learning_rate": 5.162491915495005e-08, |
|
"loss": 3.5345, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.936, |
|
"grad_norm": 3.3724429607391357, |
|
"learning_rate": 5.036236903938285e-08, |
|
"loss": 1.1051, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9368, |
|
"grad_norm": 1.2857189178466797, |
|
"learning_rate": 4.911529216645089e-08, |
|
"loss": 3.5927, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.9376, |
|
"grad_norm": 3.823451519012451, |
|
"learning_rate": 4.788369641336943e-08, |
|
"loss": 1.2766, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.9384, |
|
"grad_norm": 1.3951259851455688, |
|
"learning_rate": 4.6667589559566405e-08, |
|
"loss": 3.5188, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.9392, |
|
"grad_norm": 4.200174331665039, |
|
"learning_rate": 4.546697928663357e-08, |
|
"loss": 1.4409, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.4412181377410889, |
|
"learning_rate": 4.428187317827848e-08, |
|
"loss": 3.536, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9408, |
|
"grad_norm": 4.055942058563232, |
|
"learning_rate": 4.311227872027479e-08, |
|
"loss": 1.3862, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.9416, |
|
"grad_norm": 1.1776350736618042, |
|
"learning_rate": 4.1958203300417056e-08, |
|
"loss": 3.6454, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.9424, |
|
"grad_norm": 3.8492658138275146, |
|
"learning_rate": 4.0819654208472947e-08, |
|
"loss": 1.2609, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.9432, |
|
"grad_norm": 1.2920982837677002, |
|
"learning_rate": 3.969663863613721e-08, |
|
"loss": 3.4813, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.944, |
|
"grad_norm": 3.726270914077759, |
|
"learning_rate": 3.8589163676986674e-08, |
|
"loss": 1.3, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9448, |
|
"grad_norm": 1.0104079246520996, |
|
"learning_rate": 3.749723632643476e-08, |
|
"loss": 3.6193, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.9456, |
|
"grad_norm": 3.768679618835449, |
|
"learning_rate": 3.642086348168844e-08, |
|
"loss": 1.2007, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.9464, |
|
"grad_norm": 1.5914446115493774, |
|
"learning_rate": 3.536005194170328e-08, |
|
"loss": 3.4693, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.9472, |
|
"grad_norm": 3.930814743041992, |
|
"learning_rate": 3.431480840714152e-08, |
|
"loss": 1.4124, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.948, |
|
"grad_norm": 1.1689213514328003, |
|
"learning_rate": 3.328513948032991e-08, |
|
"loss": 3.5226, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9488, |
|
"grad_norm": 3.568666934967041, |
|
"learning_rate": 3.227105166521638e-08, |
|
"loss": 1.3847, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.9496, |
|
"grad_norm": 1.2137675285339355, |
|
"learning_rate": 3.127255136733093e-08, |
|
"loss": 3.5211, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.9504, |
|
"grad_norm": 4.159763336181641, |
|
"learning_rate": 3.028964489374453e-08, |
|
"loss": 1.3348, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.9512, |
|
"grad_norm": 0.9644594788551331, |
|
"learning_rate": 2.9322338453028066e-08, |
|
"loss": 3.5866, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.952, |
|
"grad_norm": 3.9226300716400146, |
|
"learning_rate": 2.8370638155215125e-08, |
|
"loss": 1.4359, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9528, |
|
"grad_norm": 1.1887046098709106, |
|
"learning_rate": 2.7434550011761763e-08, |
|
"loss": 3.578, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9536, |
|
"grad_norm": 3.7943222522735596, |
|
"learning_rate": 2.6514079935509586e-08, |
|
"loss": 1.2984, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.9544, |
|
"grad_norm": 1.480806589126587, |
|
"learning_rate": 2.560923374064772e-08, |
|
"loss": 3.4495, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.9552, |
|
"grad_norm": 3.667187213897705, |
|
"learning_rate": 2.4720017142676745e-08, |
|
"loss": 1.4821, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.956, |
|
"grad_norm": 1.1104971170425415, |
|
"learning_rate": 2.3846435758372034e-08, |
|
"loss": 3.6191, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9568, |
|
"grad_norm": 3.9890453815460205, |
|
"learning_rate": 2.2988495105748245e-08, |
|
"loss": 1.2608, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.9576, |
|
"grad_norm": 1.3386608362197876, |
|
"learning_rate": 2.2146200604024614e-08, |
|
"loss": 3.5502, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.9584, |
|
"grad_norm": 3.8145041465759277, |
|
"learning_rate": 2.131955757359111e-08, |
|
"loss": 1.3914, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.9592, |
|
"grad_norm": 1.692157506942749, |
|
"learning_rate": 2.050857123597455e-08, |
|
"loss": 3.5147, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 3.8497886657714844, |
|
"learning_rate": 1.9713246713805588e-08, |
|
"loss": 1.2747, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9608, |
|
"grad_norm": 1.7304649353027344, |
|
"learning_rate": 1.893358903078568e-08, |
|
"loss": 3.4559, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.9616, |
|
"grad_norm": 4.028602123260498, |
|
"learning_rate": 1.8169603111656554e-08, |
|
"loss": 1.2436, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.9624, |
|
"grad_norm": 1.0460162162780762, |
|
"learning_rate": 1.7421293782168837e-08, |
|
"loss": 3.6491, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9632, |
|
"grad_norm": 4.187633514404297, |
|
"learning_rate": 1.6688665769050704e-08, |
|
"loss": 1.2076, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.964, |
|
"grad_norm": 1.656624674797058, |
|
"learning_rate": 1.5971723699979015e-08, |
|
"loss": 3.5022, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.9648, |
|
"grad_norm": 4.018679141998291, |
|
"learning_rate": 1.5270472103549317e-08, |
|
"loss": 1.4379, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9656, |
|
"grad_norm": 1.5885015726089478, |
|
"learning_rate": 1.4584915409248113e-08, |
|
"loss": 3.4547, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.9664, |
|
"grad_norm": 3.9813663959503174, |
|
"learning_rate": 1.3915057947423705e-08, |
|
"loss": 1.3217, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.9672, |
|
"grad_norm": 1.4755148887634277, |
|
"learning_rate": 1.3260903949260107e-08, |
|
"loss": 3.4995, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.968, |
|
"grad_norm": 3.5924222469329834, |
|
"learning_rate": 1.2622457546749567e-08, |
|
"loss": 1.3469, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9688, |
|
"grad_norm": 1.0457367897033691, |
|
"learning_rate": 1.1999722772666478e-08, |
|
"loss": 3.5185, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.9696, |
|
"grad_norm": 4.9514994621276855, |
|
"learning_rate": 1.1392703560542118e-08, |
|
"loss": 1.3577, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9704, |
|
"grad_norm": 1.328444004058838, |
|
"learning_rate": 1.0801403744639672e-08, |
|
"loss": 3.4504, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.9712, |
|
"grad_norm": 3.700564384460449, |
|
"learning_rate": 1.0225827059930082e-08, |
|
"loss": 1.2764, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.972, |
|
"grad_norm": 1.7747372388839722, |
|
"learning_rate": 9.665977142068738e-09, |
|
"loss": 3.4396, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9728, |
|
"grad_norm": 3.901719331741333, |
|
"learning_rate": 9.121857527372157e-09, |
|
"loss": 1.4179, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.9736, |
|
"grad_norm": 1.1439679861068726, |
|
"learning_rate": 8.59347165279495e-09, |
|
"loss": 3.5297, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.9744, |
|
"grad_norm": 4.542992115020752, |
|
"learning_rate": 8.080822855909832e-09, |
|
"loss": 1.4076, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9752, |
|
"grad_norm": 1.05239737033844, |
|
"learning_rate": 7.583914374885426e-09, |
|
"loss": 3.6203, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.976, |
|
"grad_norm": 3.649535655975342, |
|
"learning_rate": 7.102749348465166e-09, |
|
"loss": 1.2697, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9768, |
|
"grad_norm": 1.6955548524856567, |
|
"learning_rate": 6.6373308159495275e-09, |
|
"loss": 3.4582, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9776, |
|
"grad_norm": 4.211562156677246, |
|
"learning_rate": 6.1876617171743865e-09, |
|
"loss": 1.3995, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.9784, |
|
"grad_norm": 1.1870956420898438, |
|
"learning_rate": 5.753744892494639e-09, |
|
"loss": 3.5536, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.9792, |
|
"grad_norm": 3.487827777862549, |
|
"learning_rate": 5.335583082764495e-09, |
|
"loss": 1.4411, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.736832857131958, |
|
"learning_rate": 4.933178929321103e-09, |
|
"loss": 3.5151, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9808, |
|
"grad_norm": 3.914550304412842, |
|
"learning_rate": 4.546534973968175e-09, |
|
"loss": 1.2732, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.9816, |
|
"grad_norm": 1.4647449254989624, |
|
"learning_rate": 4.175653658958501e-09, |
|
"loss": 3.3779, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9824, |
|
"grad_norm": 4.559305191040039, |
|
"learning_rate": 3.820537326980622e-09, |
|
"loss": 1.5739, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.9832, |
|
"grad_norm": 1.1620067358016968, |
|
"learning_rate": 3.481188221142184e-09, |
|
"loss": 3.5552, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.984, |
|
"grad_norm": 3.963010787963867, |
|
"learning_rate": 3.1576084849563315e-09, |
|
"loss": 1.3199, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9848, |
|
"grad_norm": 1.101914644241333, |
|
"learning_rate": 2.849800162328664e-09, |
|
"loss": 3.5772, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.9856, |
|
"grad_norm": 3.9038467407226562, |
|
"learning_rate": 2.557765197543638e-09, |
|
"loss": 1.2684, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.9864, |
|
"grad_norm": 1.2498347759246826, |
|
"learning_rate": 2.2815054352531842e-09, |
|
"loss": 3.6124, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9872, |
|
"grad_norm": 3.7474238872528076, |
|
"learning_rate": 2.0210226204639414e-09, |
|
"loss": 1.2981, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.988, |
|
"grad_norm": 1.3778389692306519, |
|
"learning_rate": 1.7763183985269882e-09, |
|
"loss": 3.5426, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.9888, |
|
"grad_norm": 3.6975715160369873, |
|
"learning_rate": 1.5473943151270155e-09, |
|
"loss": 1.3295, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9896, |
|
"grad_norm": 1.4429659843444824, |
|
"learning_rate": 1.3342518162728913e-09, |
|
"loss": 3.6067, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.9904, |
|
"grad_norm": 3.43681263923645, |
|
"learning_rate": 1.1368922482887789e-09, |
|
"loss": 1.1235, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.9912, |
|
"grad_norm": 1.3926042318344116, |
|
"learning_rate": 9.553168578049776e-10, |
|
"loss": 3.4841, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 3.8875744342803955, |
|
"learning_rate": 7.895267917501503e-10, |
|
"loss": 1.3565, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9928, |
|
"grad_norm": 1.6624120473861694, |
|
"learning_rate": 6.395230973443856e-10, |
|
"loss": 3.4427, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.9936, |
|
"grad_norm": 3.605576753616333, |
|
"learning_rate": 5.053067220925356e-10, |
|
"loss": 1.1553, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9944, |
|
"grad_norm": 1.560855507850647, |
|
"learning_rate": 3.868785137786657e-10, |
|
"loss": 3.4811, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.9952, |
|
"grad_norm": 4.160490989685059, |
|
"learning_rate": 2.842392204591149e-10, |
|
"loss": 1.2979, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.996, |
|
"grad_norm": 1.5523591041564941, |
|
"learning_rate": 1.9738949045972068e-10, |
|
"loss": 3.4412, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9968, |
|
"grad_norm": 4.556288719177246, |
|
"learning_rate": 1.2632987237054527e-10, |
|
"loss": 1.2008, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.9976, |
|
"grad_norm": 1.2331137657165527, |
|
"learning_rate": 7.106081504254514e-11, |
|
"loss": 3.4326, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"grad_norm": 4.683450222015381, |
|
"learning_rate": 3.158266758562789e-11, |
|
"loss": 1.5665, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9992, |
|
"grad_norm": 1.4326642751693726, |
|
"learning_rate": 7.89567936476665e-12, |
|
"loss": 3.5399, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.5186572074890137, |
|
"learning_rate": 0.0, |
|
"loss": 1.4857, |
|
"step": 1250 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 2000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.22349105912873e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|