diff --git "a/microsoft/grammarBERT/trainer_state.json" "b/microsoft/grammarBERT/trainer_state.json" new file mode 100644--- /dev/null +++ "b/microsoft/grammarBERT/trainer_state.json" @@ -0,0 +1,9313 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.983767459418648, + "eval_steps": 500, + "global_step": 92500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.9999928e-05, + "loss": 7.2081, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 5.9999856e-05, + "loss": 3.5421, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 5.9999784000000004e-05, + "loss": 2.2759, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 5.9999712e-05, + "loss": 1.8451, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 5.9999640000000003e-05, + "loss": 1.6239, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 5.9999568e-05, + "loss": 1.5319, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 5.9999496e-05, + "loss": 1.4177, + "step": 420 + }, + { + "epoch": 0.04, + "learning_rate": 5.9999424e-05, + "loss": 1.3957, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 5.9999352e-05, + "loss": 1.3271, + "step": 540 + }, + { + "epoch": 0.05, + "learning_rate": 5.999928e-05, + "loss": 1.2944, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 5.9999208e-05, + "loss": 1.2562, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 5.9999136000000005e-05, + "loss": 1.2175, + "step": 720 + }, + { + "epoch": 0.06, + "learning_rate": 5.9999064e-05, + "loss": 1.1996, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 5.9998992e-05, + "loss": 1.1729, + "step": 840 + }, + { + "epoch": 0.07, + "learning_rate": 5.999892e-05, + "loss": 1.1586, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 5.9998848000000004e-05, + "loss": 1.1274, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 5.9998776e-05, + "loss": 1.0855, + "step": 1020 + }, + { + "epoch": 0.08, + "learning_rate": 5.9998704e-05, + "loss": 1.1155, + "step": 1080 + }, + { + "epoch": 0.09, + "learning_rate": 5.9998632e-05, + "loss": 1.104, + "step": 1140 + }, + { + "epoch": 0.09, + "learning_rate": 5.999856e-05, + "loss": 1.0725, + "step": 1200 + }, + { + "epoch": 0.1, + "learning_rate": 5.9998488000000006e-05, + "loss": 1.0544, + "step": 1260 + }, + { + "epoch": 0.1, + "learning_rate": 5.9998416e-05, + "loss": 1.0552, + "step": 1320 + }, + { + "epoch": 0.1, + "learning_rate": 5.9998344e-05, + "loss": 1.0507, + "step": 1380 + }, + { + "epoch": 0.11, + "learning_rate": 5.9998272e-05, + "loss": 1.0456, + "step": 1440 + }, + { + "epoch": 0.11, + "learning_rate": 5.9998200000000004e-05, + "loss": 1.027, + "step": 1500 + }, + { + "epoch": 0.12, + "learning_rate": 5.9998128e-05, + "loss": 1.0333, + "step": 1560 + }, + { + "epoch": 0.12, + "learning_rate": 5.9998056e-05, + "loss": 1.009, + "step": 1620 + }, + { + "epoch": 0.13, + "learning_rate": 5.999798400000001e-05, + "loss": 1.0066, + "step": 1680 + }, + { + "epoch": 0.13, + "learning_rate": 5.9997912e-05, + "loss": 0.9916, + "step": 1740 + }, + { + "epoch": 0.14, + "learning_rate": 5.999784e-05, + "loss": 1.0084, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 5.9997768e-05, + "loss": 0.9778, + "step": 1860 + }, + { + "epoch": 0.14, + "learning_rate": 5.9997696e-05, + "loss": 0.9835, + "step": 1920 + }, + { + "epoch": 0.15, + "learning_rate": 5.9997624e-05, + "loss": 0.9726, + "step": 1980 + }, + { + "epoch": 0.15, + "learning_rate": 5.9997552000000005e-05, + "loss": 0.974, + "step": 2040 + }, + { + "epoch": 0.16, + "learning_rate": 5.999748e-05, + "loss": 0.9648, + "step": 2100 + }, + { + "epoch": 0.16, + "learning_rate": 5.9997408e-05, + "loss": 0.9555, + "step": 2160 + }, + { + "epoch": 0.17, + "learning_rate": 5.9997336e-05, + "loss": 0.9506, + "step": 2220 + }, + { + "epoch": 0.17, + "learning_rate": 5.9997264000000004e-05, + "loss": 0.9694, + "step": 2280 + }, + { + "epoch": 0.18, + "learning_rate": 5.9997192e-05, + "loss": 0.9429, + "step": 2340 + }, + { + "epoch": 0.18, + "learning_rate": 5.9997119999999997e-05, + "loss": 0.9526, + "step": 2400 + }, + { + "epoch": 0.19, + "learning_rate": 5.9997048000000006e-05, + "loss": 0.9249, + "step": 2460 + }, + { + "epoch": 0.19, + "learning_rate": 5.9996976e-05, + "loss": 0.9133, + "step": 2520 + }, + { + "epoch": 0.19, + "learning_rate": 5.9996904e-05, + "loss": 0.9296, + "step": 2580 + }, + { + "epoch": 0.2, + "learning_rate": 5.9996832e-05, + "loss": 0.9436, + "step": 2640 + }, + { + "epoch": 0.2, + "learning_rate": 5.999676e-05, + "loss": 0.9182, + "step": 2700 + }, + { + "epoch": 0.21, + "learning_rate": 5.9996688e-05, + "loss": 0.8987, + "step": 2760 + }, + { + "epoch": 0.21, + "learning_rate": 5.9996616000000005e-05, + "loss": 0.9043, + "step": 2820 + }, + { + "epoch": 0.22, + "learning_rate": 5.9996544e-05, + "loss": 0.908, + "step": 2880 + }, + { + "epoch": 0.22, + "learning_rate": 5.9996472e-05, + "loss": 0.8998, + "step": 2940 + }, + { + "epoch": 0.23, + "learning_rate": 5.999640000000001e-05, + "loss": 0.9018, + "step": 3000 + }, + { + "epoch": 0.23, + "learning_rate": 5.9996328000000003e-05, + "loss": 0.89, + "step": 3060 + }, + { + "epoch": 0.24, + "learning_rate": 5.9996256e-05, + "loss": 0.8844, + "step": 3120 + }, + { + "epoch": 0.24, + "learning_rate": 5.9996183999999996e-05, + "loss": 0.9006, + "step": 3180 + }, + { + "epoch": 0.24, + "learning_rate": 5.9996112000000006e-05, + "loss": 0.8923, + "step": 3240 + }, + { + "epoch": 0.25, + "learning_rate": 5.999604e-05, + "loss": 0.907, + "step": 3300 + }, + { + "epoch": 0.25, + "learning_rate": 5.9995968e-05, + "loss": 0.8881, + "step": 3360 + }, + { + "epoch": 0.26, + "learning_rate": 5.9995896e-05, + "loss": 0.8787, + "step": 3420 + }, + { + "epoch": 0.26, + "learning_rate": 5.9995824e-05, + "loss": 0.8692, + "step": 3480 + }, + { + "epoch": 0.27, + "learning_rate": 5.9995752e-05, + "loss": 0.8746, + "step": 3540 + }, + { + "epoch": 0.27, + "learning_rate": 5.9995680000000004e-05, + "loss": 0.8755, + "step": 3600 + }, + { + "epoch": 0.28, + "learning_rate": 5.9995608e-05, + "loss": 0.8868, + "step": 3660 + }, + { + "epoch": 0.28, + "learning_rate": 5.9995536e-05, + "loss": 0.8726, + "step": 3720 + }, + { + "epoch": 0.29, + "learning_rate": 5.999546400000001e-05, + "loss": 0.8636, + "step": 3780 + }, + { + "epoch": 0.29, + "learning_rate": 5.9995392e-05, + "loss": 0.8691, + "step": 3840 + }, + { + "epoch": 0.29, + "learning_rate": 5.999532e-05, + "loss": 0.862, + "step": 3900 + }, + { + "epoch": 0.3, + "learning_rate": 5.9995248e-05, + "loss": 0.8606, + "step": 3960 + }, + { + "epoch": 0.3, + "learning_rate": 5.9995176000000006e-05, + "loss": 0.8735, + "step": 4020 + }, + { + "epoch": 0.31, + "learning_rate": 5.9995104e-05, + "loss": 0.8453, + "step": 4080 + }, + { + "epoch": 0.31, + "learning_rate": 5.9995032e-05, + "loss": 0.8491, + "step": 4140 + }, + { + "epoch": 0.32, + "learning_rate": 5.999496e-05, + "loss": 0.8714, + "step": 4200 + }, + { + "epoch": 0.32, + "learning_rate": 5.9994888000000004e-05, + "loss": 0.8438, + "step": 4260 + }, + { + "epoch": 0.33, + "learning_rate": 5.9994816e-05, + "loss": 0.877, + "step": 4320 + }, + { + "epoch": 0.33, + "learning_rate": 5.9994744000000004e-05, + "loss": 0.8526, + "step": 4380 + }, + { + "epoch": 0.34, + "learning_rate": 5.9994672e-05, + "loss": 0.8675, + "step": 4440 + }, + { + "epoch": 0.34, + "learning_rate": 5.9994599999999996e-05, + "loss": 0.8572, + "step": 4500 + }, + { + "epoch": 0.34, + "learning_rate": 5.9994528000000006e-05, + "loss": 0.853, + "step": 4560 + }, + { + "epoch": 0.35, + "learning_rate": 5.9994456e-05, + "loss": 0.8329, + "step": 4620 + }, + { + "epoch": 0.35, + "learning_rate": 5.9994384e-05, + "loss": 0.852, + "step": 4680 + }, + { + "epoch": 0.36, + "learning_rate": 5.9994312e-05, + "loss": 0.8385, + "step": 4740 + }, + { + "epoch": 0.36, + "learning_rate": 5.9994240000000005e-05, + "loss": 0.8431, + "step": 4800 + }, + { + "epoch": 0.37, + "learning_rate": 5.9994168e-05, + "loss": 0.8492, + "step": 4860 + }, + { + "epoch": 0.37, + "learning_rate": 5.9994096e-05, + "loss": 0.8421, + "step": 4920 + }, + { + "epoch": 0.38, + "learning_rate": 5.9994024e-05, + "loss": 0.8302, + "step": 4980 + }, + { + "epoch": 0.38, + "learning_rate": 5.9993952000000004e-05, + "loss": 0.8301, + "step": 5040 + }, + { + "epoch": 0.39, + "learning_rate": 5.999388e-05, + "loss": 0.8375, + "step": 5100 + }, + { + "epoch": 0.39, + "learning_rate": 5.9993808000000003e-05, + "loss": 0.8303, + "step": 5160 + }, + { + "epoch": 0.39, + "learning_rate": 5.9993736e-05, + "loss": 0.8361, + "step": 5220 + }, + { + "epoch": 0.4, + "learning_rate": 5.9993663999999996e-05, + "loss": 0.8357, + "step": 5280 + }, + { + "epoch": 0.4, + "learning_rate": 5.9993592000000006e-05, + "loss": 0.8418, + "step": 5340 + }, + { + "epoch": 0.41, + "learning_rate": 5.999352e-05, + "loss": 0.8424, + "step": 5400 + }, + { + "epoch": 0.41, + "learning_rate": 5.9993448e-05, + "loss": 0.8374, + "step": 5460 + }, + { + "epoch": 0.42, + "learning_rate": 5.9993376e-05, + "loss": 0.8274, + "step": 5520 + }, + { + "epoch": 0.42, + "learning_rate": 5.9993304000000005e-05, + "loss": 0.8327, + "step": 5580 + }, + { + "epoch": 0.43, + "learning_rate": 5.9993232e-05, + "loss": 0.8032, + "step": 5640 + }, + { + "epoch": 0.43, + "learning_rate": 5.9993160000000004e-05, + "loss": 0.8153, + "step": 5700 + }, + { + "epoch": 0.43, + "learning_rate": 5.9993088e-05, + "loss": 0.8205, + "step": 5760 + }, + { + "epoch": 0.44, + "learning_rate": 5.9993016000000004e-05, + "loss": 0.8156, + "step": 5820 + }, + { + "epoch": 0.44, + "learning_rate": 5.9992944e-05, + "loss": 0.8177, + "step": 5880 + }, + { + "epoch": 0.45, + "learning_rate": 5.9992872e-05, + "loss": 0.8276, + "step": 5940 + }, + { + "epoch": 0.45, + "learning_rate": 5.99928e-05, + "loss": 0.7939, + "step": 6000 + }, + { + "epoch": 0.46, + "learning_rate": 5.9992727999999996e-05, + "loss": 0.8119, + "step": 6060 + }, + { + "epoch": 0.46, + "learning_rate": 5.9992656000000005e-05, + "loss": 0.8212, + "step": 6120 + }, + { + "epoch": 0.47, + "learning_rate": 5.9992584e-05, + "loss": 0.8212, + "step": 6180 + }, + { + "epoch": 0.47, + "learning_rate": 5.9992512e-05, + "loss": 0.8284, + "step": 6240 + }, + { + "epoch": 0.48, + "learning_rate": 5.999244e-05, + "loss": 0.8143, + "step": 6300 + }, + { + "epoch": 0.48, + "learning_rate": 5.9992368000000004e-05, + "loss": 0.8034, + "step": 6360 + }, + { + "epoch": 0.48, + "learning_rate": 5.9992296e-05, + "loss": 0.824, + "step": 6420 + }, + { + "epoch": 0.49, + "learning_rate": 5.9992224000000004e-05, + "loss": 0.804, + "step": 6480 + }, + { + "epoch": 0.49, + "learning_rate": 5.9992152e-05, + "loss": 0.8078, + "step": 6540 + }, + { + "epoch": 0.5, + "learning_rate": 5.999208e-05, + "loss": 0.7899, + "step": 6600 + }, + { + "epoch": 0.5, + "learning_rate": 5.9992008e-05, + "loss": 0.808, + "step": 6660 + }, + { + "epoch": 0.51, + "learning_rate": 5.9991936e-05, + "loss": 0.7898, + "step": 6720 + }, + { + "epoch": 0.51, + "learning_rate": 5.9991864e-05, + "loss": 0.802, + "step": 6780 + }, + { + "epoch": 0.52, + "learning_rate": 5.9991792e-05, + "loss": 0.798, + "step": 6840 + }, + { + "epoch": 0.52, + "learning_rate": 5.9991720000000005e-05, + "loss": 0.8134, + "step": 6900 + }, + { + "epoch": 0.53, + "learning_rate": 5.9991648e-05, + "loss": 0.8006, + "step": 6960 + }, + { + "epoch": 0.53, + "learning_rate": 5.9991576e-05, + "loss": 0.7945, + "step": 7020 + }, + { + "epoch": 0.53, + "learning_rate": 5.9991504e-05, + "loss": 0.7959, + "step": 7080 + }, + { + "epoch": 0.54, + "learning_rate": 5.9991432000000004e-05, + "loss": 0.7983, + "step": 7140 + }, + { + "epoch": 0.54, + "learning_rate": 5.999136e-05, + "loss": 0.7968, + "step": 7200 + }, + { + "epoch": 0.55, + "learning_rate": 5.9991288e-05, + "loss": 0.7914, + "step": 7260 + }, + { + "epoch": 0.55, + "learning_rate": 5.9991216e-05, + "loss": 0.8027, + "step": 7320 + }, + { + "epoch": 0.56, + "learning_rate": 5.9991144e-05, + "loss": 0.7931, + "step": 7380 + }, + { + "epoch": 0.56, + "learning_rate": 5.9991072000000006e-05, + "loss": 0.7944, + "step": 7440 + }, + { + "epoch": 0.57, + "learning_rate": 5.9991e-05, + "loss": 0.7978, + "step": 7500 + }, + { + "epoch": 0.57, + "learning_rate": 5.9990928e-05, + "loss": 0.7839, + "step": 7560 + }, + { + "epoch": 0.58, + "learning_rate": 5.9990856e-05, + "loss": 0.803, + "step": 7620 + }, + { + "epoch": 0.58, + "learning_rate": 5.9990784000000005e-05, + "loss": 0.7868, + "step": 7680 + }, + { + "epoch": 0.58, + "learning_rate": 5.9990712e-05, + "loss": 0.7728, + "step": 7740 + }, + { + "epoch": 0.59, + "learning_rate": 5.999064e-05, + "loss": 0.7898, + "step": 7800 + }, + { + "epoch": 0.59, + "learning_rate": 5.9990568e-05, + "loss": 0.7804, + "step": 7860 + }, + { + "epoch": 0.6, + "learning_rate": 5.9990496000000003e-05, + "loss": 0.7929, + "step": 7920 + }, + { + "epoch": 0.6, + "learning_rate": 5.9990424e-05, + "loss": 0.7698, + "step": 7980 + }, + { + "epoch": 0.61, + "learning_rate": 5.9990352e-05, + "loss": 0.79, + "step": 8040 + }, + { + "epoch": 0.61, + "learning_rate": 5.999028e-05, + "loss": 0.7941, + "step": 8100 + }, + { + "epoch": 0.62, + "learning_rate": 5.9990208e-05, + "loss": 0.7694, + "step": 8160 + }, + { + "epoch": 0.62, + "learning_rate": 5.9990136000000005e-05, + "loss": 0.7749, + "step": 8220 + }, + { + "epoch": 0.63, + "learning_rate": 5.9990064e-05, + "loss": 0.7827, + "step": 8280 + }, + { + "epoch": 0.63, + "learning_rate": 5.9989992e-05, + "loss": 0.7852, + "step": 8340 + }, + { + "epoch": 0.63, + "learning_rate": 5.998992000000001e-05, + "loss": 0.7828, + "step": 8400 + }, + { + "epoch": 0.64, + "learning_rate": 5.9989848000000004e-05, + "loss": 0.7742, + "step": 8460 + }, + { + "epoch": 0.64, + "learning_rate": 5.9989776e-05, + "loss": 0.7679, + "step": 8520 + }, + { + "epoch": 0.65, + "learning_rate": 5.9989704e-05, + "loss": 0.7717, + "step": 8580 + }, + { + "epoch": 0.65, + "learning_rate": 5.9989632e-05, + "loss": 0.7767, + "step": 8640 + }, + { + "epoch": 0.66, + "learning_rate": 5.998956e-05, + "loss": 0.778, + "step": 8700 + }, + { + "epoch": 0.66, + "learning_rate": 5.9989488e-05, + "loss": 0.7703, + "step": 8760 + }, + { + "epoch": 0.67, + "learning_rate": 5.9989416e-05, + "loss": 0.7637, + "step": 8820 + }, + { + "epoch": 0.67, + "learning_rate": 5.9989344e-05, + "loss": 0.7665, + "step": 8880 + }, + { + "epoch": 0.67, + "learning_rate": 5.9989272e-05, + "loss": 0.7708, + "step": 8940 + }, + { + "epoch": 0.68, + "learning_rate": 5.9989200000000005e-05, + "loss": 0.783, + "step": 9000 + }, + { + "epoch": 0.68, + "learning_rate": 5.9989128e-05, + "loss": 0.7839, + "step": 9060 + }, + { + "epoch": 0.69, + "learning_rate": 5.9989056e-05, + "loss": 0.7663, + "step": 9120 + }, + { + "epoch": 0.69, + "learning_rate": 5.998898400000001e-05, + "loss": 0.7664, + "step": 9180 + }, + { + "epoch": 0.7, + "learning_rate": 5.9988912000000004e-05, + "loss": 0.7694, + "step": 9240 + }, + { + "epoch": 0.7, + "learning_rate": 5.998884e-05, + "loss": 0.7658, + "step": 9300 + }, + { + "epoch": 0.71, + "learning_rate": 5.9988767999999996e-05, + "loss": 0.7564, + "step": 9360 + }, + { + "epoch": 0.71, + "learning_rate": 5.9988696e-05, + "loss": 0.7785, + "step": 9420 + }, + { + "epoch": 0.72, + "learning_rate": 5.9988624e-05, + "loss": 0.7709, + "step": 9480 + }, + { + "epoch": 0.72, + "learning_rate": 5.9988552e-05, + "loss": 0.7658, + "step": 9540 + }, + { + "epoch": 0.72, + "learning_rate": 5.998848e-05, + "loss": 0.7912, + "step": 9600 + }, + { + "epoch": 0.73, + "learning_rate": 5.9988408e-05, + "loss": 0.7777, + "step": 9660 + }, + { + "epoch": 0.73, + "learning_rate": 5.9988336e-05, + "loss": 0.7559, + "step": 9720 + }, + { + "epoch": 0.74, + "learning_rate": 5.9988264000000005e-05, + "loss": 0.763, + "step": 9780 + }, + { + "epoch": 0.74, + "learning_rate": 5.9988192e-05, + "loss": 0.7614, + "step": 9840 + }, + { + "epoch": 0.75, + "learning_rate": 5.998812e-05, + "loss": 0.7457, + "step": 9900 + }, + { + "epoch": 0.75, + "learning_rate": 5.998804800000001e-05, + "loss": 0.7683, + "step": 9960 + }, + { + "epoch": 0.76, + "learning_rate": 5.9987976e-05, + "loss": 0.7604, + "step": 10020 + }, + { + "epoch": 0.76, + "learning_rate": 5.9987904e-05, + "loss": 0.7683, + "step": 10080 + }, + { + "epoch": 0.77, + "learning_rate": 5.9987832e-05, + "loss": 0.7655, + "step": 10140 + }, + { + "epoch": 0.77, + "learning_rate": 5.9987760000000006e-05, + "loss": 0.762, + "step": 10200 + }, + { + "epoch": 0.77, + "learning_rate": 5.9987688e-05, + "loss": 0.753, + "step": 10260 + }, + { + "epoch": 0.78, + "learning_rate": 5.9987616e-05, + "loss": 0.7625, + "step": 10320 + }, + { + "epoch": 0.78, + "learning_rate": 5.9987544e-05, + "loss": 0.7487, + "step": 10380 + }, + { + "epoch": 0.79, + "learning_rate": 5.9987472e-05, + "loss": 0.7526, + "step": 10440 + }, + { + "epoch": 0.79, + "learning_rate": 5.99874e-05, + "loss": 0.7593, + "step": 10500 + }, + { + "epoch": 0.8, + "learning_rate": 5.9987328000000004e-05, + "loss": 0.7648, + "step": 10560 + }, + { + "epoch": 0.8, + "learning_rate": 5.9987256e-05, + "loss": 0.7574, + "step": 10620 + }, + { + "epoch": 0.81, + "learning_rate": 5.9987184e-05, + "loss": 0.7488, + "step": 10680 + }, + { + "epoch": 0.81, + "learning_rate": 5.9987112000000007e-05, + "loss": 0.7594, + "step": 10740 + }, + { + "epoch": 0.82, + "learning_rate": 5.998704e-05, + "loss": 0.7509, + "step": 10800 + }, + { + "epoch": 0.82, + "learning_rate": 5.9986968e-05, + "loss": 0.7422, + "step": 10860 + }, + { + "epoch": 0.82, + "learning_rate": 5.9986896e-05, + "loss": 0.7344, + "step": 10920 + }, + { + "epoch": 0.83, + "learning_rate": 5.9986824000000005e-05, + "loss": 0.7528, + "step": 10980 + }, + { + "epoch": 0.83, + "learning_rate": 5.9986752e-05, + "loss": 0.7547, + "step": 11040 + }, + { + "epoch": 0.84, + "learning_rate": 5.998668e-05, + "loss": 0.7416, + "step": 11100 + }, + { + "epoch": 0.84, + "learning_rate": 5.9986608e-05, + "loss": 0.7649, + "step": 11160 + }, + { + "epoch": 0.85, + "learning_rate": 5.9986536e-05, + "loss": 0.7482, + "step": 11220 + }, + { + "epoch": 0.85, + "learning_rate": 5.9986464e-05, + "loss": 0.7305, + "step": 11280 + }, + { + "epoch": 0.86, + "learning_rate": 5.9986392000000004e-05, + "loss": 0.7371, + "step": 11340 + }, + { + "epoch": 0.86, + "learning_rate": 5.998632e-05, + "loss": 0.7407, + "step": 11400 + }, + { + "epoch": 0.87, + "learning_rate": 5.9986247999999996e-05, + "loss": 0.728, + "step": 11460 + }, + { + "epoch": 0.87, + "learning_rate": 5.9986176000000006e-05, + "loss": 0.7559, + "step": 11520 + }, + { + "epoch": 0.87, + "learning_rate": 5.9986104e-05, + "loss": 0.7428, + "step": 11580 + }, + { + "epoch": 0.88, + "learning_rate": 5.9986032e-05, + "loss": 0.734, + "step": 11640 + }, + { + "epoch": 0.88, + "learning_rate": 5.998596e-05, + "loss": 0.7402, + "step": 11700 + }, + { + "epoch": 0.89, + "learning_rate": 5.9985888000000005e-05, + "loss": 0.7427, + "step": 11760 + }, + { + "epoch": 0.89, + "learning_rate": 5.9985816e-05, + "loss": 0.7442, + "step": 11820 + }, + { + "epoch": 0.9, + "learning_rate": 5.9985744000000004e-05, + "loss": 0.7385, + "step": 11880 + }, + { + "epoch": 0.9, + "learning_rate": 5.9985672e-05, + "loss": 0.7491, + "step": 11940 + }, + { + "epoch": 0.91, + "learning_rate": 5.9985600000000004e-05, + "loss": 0.7514, + "step": 12000 + }, + { + "epoch": 0.91, + "learning_rate": 5.9985528e-05, + "loss": 0.7364, + "step": 12060 + }, + { + "epoch": 0.92, + "learning_rate": 5.9985456e-05, + "loss": 0.7375, + "step": 12120 + }, + { + "epoch": 0.92, + "learning_rate": 5.9985384e-05, + "loss": 0.7472, + "step": 12180 + }, + { + "epoch": 0.92, + "learning_rate": 5.9985311999999996e-05, + "loss": 0.7363, + "step": 12240 + }, + { + "epoch": 0.93, + "learning_rate": 5.9985240000000006e-05, + "loss": 0.7564, + "step": 12300 + }, + { + "epoch": 0.93, + "learning_rate": 5.9985168e-05, + "loss": 0.7414, + "step": 12360 + }, + { + "epoch": 0.94, + "learning_rate": 5.9985096e-05, + "loss": 0.7326, + "step": 12420 + }, + { + "epoch": 0.94, + "learning_rate": 5.9985024e-05, + "loss": 0.7518, + "step": 12480 + }, + { + "epoch": 0.95, + "learning_rate": 5.9984952000000005e-05, + "loss": 0.7297, + "step": 12540 + }, + { + "epoch": 0.95, + "learning_rate": 5.998488e-05, + "loss": 0.719, + "step": 12600 + }, + { + "epoch": 0.96, + "learning_rate": 5.9984808000000004e-05, + "loss": 0.7326, + "step": 12660 + }, + { + "epoch": 0.96, + "learning_rate": 5.9984736e-05, + "loss": 0.7373, + "step": 12720 + }, + { + "epoch": 0.96, + "learning_rate": 5.9984664000000003e-05, + "loss": 0.7291, + "step": 12780 + }, + { + "epoch": 0.97, + "learning_rate": 5.9984592000000006e-05, + "loss": 0.7546, + "step": 12840 + }, + { + "epoch": 0.97, + "learning_rate": 5.998452e-05, + "loss": 0.7359, + "step": 12900 + }, + { + "epoch": 0.98, + "learning_rate": 5.9984448e-05, + "loss": 0.723, + "step": 12960 + }, + { + "epoch": 0.98, + "learning_rate": 5.9984375999999995e-05, + "loss": 0.7463, + "step": 13020 + }, + { + "epoch": 0.99, + "learning_rate": 5.9984304000000005e-05, + "loss": 0.7288, + "step": 13080 + }, + { + "epoch": 0.99, + "learning_rate": 5.9984232e-05, + "loss": 0.714, + "step": 13140 + }, + { + "epoch": 1.0, + "learning_rate": 5.998416e-05, + "loss": 0.7358, + "step": 13200 + }, + { + "epoch": 1.0, + "eval_loss": 0.6823992729187012, + "eval_runtime": 338.539, + "eval_samples_per_second": 295.387, + "eval_steps_per_second": 2.31, + "step": 13245 + }, + { + "epoch": 1.0, + "learning_rate": 5.9984088e-05, + "loss": 0.7322, + "step": 13260 + }, + { + "epoch": 1.01, + "learning_rate": 5.9984016000000004e-05, + "loss": 0.7234, + "step": 13320 + }, + { + "epoch": 1.01, + "learning_rate": 5.9983944e-05, + "loss": 0.7239, + "step": 13380 + }, + { + "epoch": 1.01, + "learning_rate": 5.9983872000000004e-05, + "loss": 0.7261, + "step": 13440 + }, + { + "epoch": 1.02, + "learning_rate": 5.99838e-05, + "loss": 0.7221, + "step": 13500 + }, + { + "epoch": 1.02, + "learning_rate": 5.9983728e-05, + "loss": 0.7171, + "step": 13560 + }, + { + "epoch": 1.03, + "learning_rate": 5.9983656000000006e-05, + "loss": 0.7191, + "step": 13620 + }, + { + "epoch": 1.03, + "learning_rate": 5.9983584e-05, + "loss": 0.7295, + "step": 13680 + }, + { + "epoch": 1.04, + "learning_rate": 5.9983512e-05, + "loss": 0.7121, + "step": 13740 + }, + { + "epoch": 1.04, + "learning_rate": 5.9983439999999995e-05, + "loss": 0.7221, + "step": 13800 + }, + { + "epoch": 1.05, + "learning_rate": 5.9983368000000005e-05, + "loss": 0.7249, + "step": 13860 + }, + { + "epoch": 1.05, + "learning_rate": 5.9983296e-05, + "loss": 0.718, + "step": 13920 + }, + { + "epoch": 1.06, + "learning_rate": 5.9983224e-05, + "loss": 0.7343, + "step": 13980 + }, + { + "epoch": 1.06, + "learning_rate": 5.9983152e-05, + "loss": 0.7224, + "step": 14040 + }, + { + "epoch": 1.06, + "learning_rate": 5.9983080000000004e-05, + "loss": 0.7246, + "step": 14100 + }, + { + "epoch": 1.07, + "learning_rate": 5.9983008e-05, + "loss": 0.7299, + "step": 14160 + }, + { + "epoch": 1.07, + "learning_rate": 5.9982936e-05, + "loss": 0.7202, + "step": 14220 + }, + { + "epoch": 1.08, + "learning_rate": 5.9982864e-05, + "loss": 0.7202, + "step": 14280 + }, + { + "epoch": 1.08, + "learning_rate": 5.9982792e-05, + "loss": 0.7131, + "step": 14340 + }, + { + "epoch": 1.09, + "learning_rate": 5.9982720000000006e-05, + "loss": 0.7226, + "step": 14400 + }, + { + "epoch": 1.09, + "learning_rate": 5.9982648e-05, + "loss": 0.7207, + "step": 14460 + }, + { + "epoch": 1.1, + "learning_rate": 5.9982576e-05, + "loss": 0.7174, + "step": 14520 + }, + { + "epoch": 1.1, + "learning_rate": 5.9982504e-05, + "loss": 0.7156, + "step": 14580 + }, + { + "epoch": 1.11, + "learning_rate": 5.9982432000000004e-05, + "loss": 0.7155, + "step": 14640 + }, + { + "epoch": 1.11, + "learning_rate": 5.998236e-05, + "loss": 0.7232, + "step": 14700 + }, + { + "epoch": 1.11, + "learning_rate": 5.9982288e-05, + "loss": 0.7145, + "step": 14760 + }, + { + "epoch": 1.12, + "learning_rate": 5.9982216e-05, + "loss": 0.7088, + "step": 14820 + }, + { + "epoch": 1.12, + "learning_rate": 5.9982144e-05, + "loss": 0.7274, + "step": 14880 + }, + { + "epoch": 1.13, + "learning_rate": 5.9982072e-05, + "loss": 0.7279, + "step": 14940 + }, + { + "epoch": 1.13, + "learning_rate": 5.9982e-05, + "loss": 0.7256, + "step": 15000 + }, + { + "epoch": 1.14, + "learning_rate": 5.9981928e-05, + "loss": 0.7104, + "step": 15060 + }, + { + "epoch": 1.14, + "learning_rate": 5.9981856e-05, + "loss": 0.7111, + "step": 15120 + }, + { + "epoch": 1.15, + "learning_rate": 5.9981784000000005e-05, + "loss": 0.7323, + "step": 15180 + }, + { + "epoch": 1.15, + "learning_rate": 5.9981712e-05, + "loss": 0.7114, + "step": 15240 + }, + { + "epoch": 1.16, + "learning_rate": 5.998164e-05, + "loss": 0.707, + "step": 15300 + }, + { + "epoch": 1.16, + "learning_rate": 5.998156800000001e-05, + "loss": 0.7059, + "step": 15360 + }, + { + "epoch": 1.16, + "learning_rate": 5.9981496000000004e-05, + "loss": 0.7111, + "step": 15420 + }, + { + "epoch": 1.17, + "learning_rate": 5.9981424e-05, + "loss": 0.6973, + "step": 15480 + }, + { + "epoch": 1.17, + "learning_rate": 5.9981352e-05, + "loss": 0.7343, + "step": 15540 + }, + { + "epoch": 1.18, + "learning_rate": 5.998128e-05, + "loss": 0.7219, + "step": 15600 + }, + { + "epoch": 1.18, + "learning_rate": 5.9981208e-05, + "loss": 0.6944, + "step": 15660 + }, + { + "epoch": 1.19, + "learning_rate": 5.9981136e-05, + "loss": 0.7073, + "step": 15720 + }, + { + "epoch": 1.19, + "learning_rate": 5.9981064e-05, + "loss": 0.7248, + "step": 15780 + }, + { + "epoch": 1.2, + "learning_rate": 5.9980992e-05, + "loss": 0.7098, + "step": 15840 + }, + { + "epoch": 1.2, + "learning_rate": 5.998092e-05, + "loss": 0.7124, + "step": 15900 + }, + { + "epoch": 1.2, + "learning_rate": 5.9980848000000005e-05, + "loss": 0.7129, + "step": 15960 + }, + { + "epoch": 1.21, + "learning_rate": 5.9980776e-05, + "loss": 0.7221, + "step": 16020 + }, + { + "epoch": 1.21, + "learning_rate": 5.9980704e-05, + "loss": 0.7014, + "step": 16080 + }, + { + "epoch": 1.22, + "learning_rate": 5.998063200000001e-05, + "loss": 0.7111, + "step": 16140 + }, + { + "epoch": 1.22, + "learning_rate": 5.9980560000000004e-05, + "loss": 0.71, + "step": 16200 + }, + { + "epoch": 1.23, + "learning_rate": 5.9980488e-05, + "loss": 0.7108, + "step": 16260 + }, + { + "epoch": 1.23, + "learning_rate": 5.9980416e-05, + "loss": 0.7084, + "step": 16320 + }, + { + "epoch": 1.24, + "learning_rate": 5.9980344e-05, + "loss": 0.7016, + "step": 16380 + }, + { + "epoch": 1.24, + "learning_rate": 5.9980272e-05, + "loss": 0.7113, + "step": 16440 + }, + { + "epoch": 1.25, + "learning_rate": 5.99802e-05, + "loss": 0.7068, + "step": 16500 + }, + { + "epoch": 1.25, + "learning_rate": 5.9980128e-05, + "loss": 0.7147, + "step": 16560 + }, + { + "epoch": 1.25, + "learning_rate": 5.9980056e-05, + "loss": 0.713, + "step": 16620 + }, + { + "epoch": 1.26, + "learning_rate": 5.9979984e-05, + "loss": 0.7161, + "step": 16680 + }, + { + "epoch": 1.26, + "learning_rate": 5.9979912000000004e-05, + "loss": 0.6982, + "step": 16740 + }, + { + "epoch": 1.27, + "learning_rate": 5.997984e-05, + "loss": 0.7029, + "step": 16800 + }, + { + "epoch": 1.27, + "learning_rate": 5.9979768e-05, + "loss": 0.6942, + "step": 16860 + }, + { + "epoch": 1.28, + "learning_rate": 5.997969600000001e-05, + "loss": 0.7229, + "step": 16920 + }, + { + "epoch": 1.28, + "learning_rate": 5.9979624e-05, + "loss": 0.7194, + "step": 16980 + }, + { + "epoch": 1.29, + "learning_rate": 5.9979552e-05, + "loss": 0.7047, + "step": 17040 + }, + { + "epoch": 1.29, + "learning_rate": 5.997948e-05, + "loss": 0.7058, + "step": 17100 + }, + { + "epoch": 1.3, + "learning_rate": 5.9979408e-05, + "loss": 0.722, + "step": 17160 + }, + { + "epoch": 1.3, + "learning_rate": 5.9979336e-05, + "loss": 0.7262, + "step": 17220 + }, + { + "epoch": 1.3, + "learning_rate": 5.9979264000000005e-05, + "loss": 0.7159, + "step": 17280 + }, + { + "epoch": 1.31, + "learning_rate": 5.9979192e-05, + "loss": 0.7058, + "step": 17340 + }, + { + "epoch": 1.31, + "learning_rate": 5.997912e-05, + "loss": 0.729, + "step": 17400 + }, + { + "epoch": 1.32, + "learning_rate": 5.9979048e-05, + "loss": 0.713, + "step": 17460 + }, + { + "epoch": 1.32, + "learning_rate": 5.9978976000000004e-05, + "loss": 0.7048, + "step": 17520 + }, + { + "epoch": 1.33, + "learning_rate": 5.9978904e-05, + "loss": 0.7098, + "step": 17580 + }, + { + "epoch": 1.33, + "learning_rate": 5.9978831999999997e-05, + "loss": 0.7109, + "step": 17640 + }, + { + "epoch": 1.34, + "learning_rate": 5.9978760000000006e-05, + "loss": 0.6913, + "step": 17700 + }, + { + "epoch": 1.34, + "learning_rate": 5.9978688e-05, + "loss": 0.6954, + "step": 17760 + }, + { + "epoch": 1.35, + "learning_rate": 5.9978616e-05, + "loss": 0.7038, + "step": 17820 + }, + { + "epoch": 1.35, + "learning_rate": 5.9978544e-05, + "loss": 0.6868, + "step": 17880 + }, + { + "epoch": 1.35, + "learning_rate": 5.9978472000000005e-05, + "loss": 0.7034, + "step": 17940 + }, + { + "epoch": 1.36, + "learning_rate": 5.99784e-05, + "loss": 0.7128, + "step": 18000 + }, + { + "epoch": 1.36, + "learning_rate": 5.9978328000000005e-05, + "loss": 0.6995, + "step": 18060 + }, + { + "epoch": 1.37, + "learning_rate": 5.9978256e-05, + "loss": 0.7158, + "step": 18120 + }, + { + "epoch": 1.37, + "learning_rate": 5.9978184e-05, + "loss": 0.7059, + "step": 18180 + }, + { + "epoch": 1.38, + "learning_rate": 5.9978112e-05, + "loss": 0.7026, + "step": 18240 + }, + { + "epoch": 1.38, + "learning_rate": 5.9978040000000004e-05, + "loss": 0.7001, + "step": 18300 + }, + { + "epoch": 1.39, + "learning_rate": 5.9977968e-05, + "loss": 0.7042, + "step": 18360 + }, + { + "epoch": 1.39, + "learning_rate": 5.9977895999999996e-05, + "loss": 0.6947, + "step": 18420 + }, + { + "epoch": 1.4, + "learning_rate": 5.9977824000000006e-05, + "loss": 0.6909, + "step": 18480 + }, + { + "epoch": 1.4, + "learning_rate": 5.9977752e-05, + "loss": 0.7113, + "step": 18540 + }, + { + "epoch": 1.4, + "learning_rate": 5.997768e-05, + "loss": 0.7108, + "step": 18600 + }, + { + "epoch": 1.41, + "learning_rate": 5.9977608e-05, + "loss": 0.7088, + "step": 18660 + }, + { + "epoch": 1.41, + "learning_rate": 5.9977536000000005e-05, + "loss": 0.686, + "step": 18720 + }, + { + "epoch": 1.42, + "learning_rate": 5.9977464e-05, + "loss": 0.7014, + "step": 18780 + }, + { + "epoch": 1.42, + "learning_rate": 5.9977392000000004e-05, + "loss": 0.7099, + "step": 18840 + }, + { + "epoch": 1.43, + "learning_rate": 5.997732e-05, + "loss": 0.7062, + "step": 18900 + }, + { + "epoch": 1.43, + "learning_rate": 5.9977248e-05, + "loss": 0.6911, + "step": 18960 + }, + { + "epoch": 1.44, + "learning_rate": 5.997717600000001e-05, + "loss": 0.6927, + "step": 19020 + }, + { + "epoch": 1.44, + "learning_rate": 5.9977104e-05, + "loss": 0.6832, + "step": 19080 + }, + { + "epoch": 1.45, + "learning_rate": 5.9977032e-05, + "loss": 0.6994, + "step": 19140 + }, + { + "epoch": 1.45, + "learning_rate": 5.9976959999999996e-05, + "loss": 0.7017, + "step": 19200 + }, + { + "epoch": 1.45, + "learning_rate": 5.9976888000000006e-05, + "loss": 0.6809, + "step": 19260 + }, + { + "epoch": 1.46, + "learning_rate": 5.9976816e-05, + "loss": 0.6927, + "step": 19320 + }, + { + "epoch": 1.46, + "learning_rate": 5.9976744e-05, + "loss": 0.692, + "step": 19380 + }, + { + "epoch": 1.47, + "learning_rate": 5.9976672e-05, + "loss": 0.7024, + "step": 19440 + }, + { + "epoch": 1.47, + "learning_rate": 5.9976600000000004e-05, + "loss": 0.6924, + "step": 19500 + }, + { + "epoch": 1.48, + "learning_rate": 5.9976528e-05, + "loss": 0.698, + "step": 19560 + }, + { + "epoch": 1.48, + "learning_rate": 5.9976456000000004e-05, + "loss": 0.712, + "step": 19620 + }, + { + "epoch": 1.49, + "learning_rate": 5.9976384e-05, + "loss": 0.6885, + "step": 19680 + }, + { + "epoch": 1.49, + "learning_rate": 5.9976311999999996e-05, + "loss": 0.6847, + "step": 19740 + }, + { + "epoch": 1.49, + "learning_rate": 5.9976240000000006e-05, + "loss": 0.7033, + "step": 19800 + }, + { + "epoch": 1.5, + "learning_rate": 5.9976168e-05, + "loss": 0.6882, + "step": 19860 + }, + { + "epoch": 1.5, + "learning_rate": 5.9976096e-05, + "loss": 0.6874, + "step": 19920 + }, + { + "epoch": 1.51, + "learning_rate": 5.9976023999999995e-05, + "loss": 0.6991, + "step": 19980 + }, + { + "epoch": 1.51, + "learning_rate": 5.9975952000000005e-05, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.52, + "learning_rate": 5.997588e-05, + "loss": 0.7113, + "step": 20100 + }, + { + "epoch": 1.52, + "learning_rate": 5.9975808e-05, + "loss": 0.6897, + "step": 20160 + }, + { + "epoch": 1.53, + "learning_rate": 5.9975736e-05, + "loss": 0.6992, + "step": 20220 + }, + { + "epoch": 1.53, + "learning_rate": 5.9975664000000004e-05, + "loss": 0.6903, + "step": 20280 + }, + { + "epoch": 1.54, + "learning_rate": 5.9975592e-05, + "loss": 0.6822, + "step": 20340 + }, + { + "epoch": 1.54, + "learning_rate": 5.9975520000000003e-05, + "loss": 0.6973, + "step": 20400 + }, + { + "epoch": 1.54, + "learning_rate": 5.9975448e-05, + "loss": 0.7057, + "step": 20460 + }, + { + "epoch": 1.55, + "learning_rate": 5.9975376e-05, + "loss": 0.6994, + "step": 20520 + }, + { + "epoch": 1.55, + "learning_rate": 5.9975304000000006e-05, + "loss": 0.7048, + "step": 20580 + }, + { + "epoch": 1.56, + "learning_rate": 5.9975232e-05, + "loss": 0.6902, + "step": 20640 + }, + { + "epoch": 1.56, + "learning_rate": 5.997516e-05, + "loss": 0.6974, + "step": 20700 + }, + { + "epoch": 1.57, + "learning_rate": 5.9975088e-05, + "loss": 0.6899, + "step": 20760 + }, + { + "epoch": 1.57, + "learning_rate": 5.9975016000000005e-05, + "loss": 0.6905, + "step": 20820 + }, + { + "epoch": 1.58, + "learning_rate": 5.9974944e-05, + "loss": 0.7006, + "step": 20880 + }, + { + "epoch": 1.58, + "learning_rate": 5.9974872e-05, + "loss": 0.693, + "step": 20940 + }, + { + "epoch": 1.59, + "learning_rate": 5.99748e-05, + "loss": 0.6897, + "step": 21000 + }, + { + "epoch": 1.59, + "learning_rate": 5.9974728000000004e-05, + "loss": 0.6829, + "step": 21060 + }, + { + "epoch": 1.59, + "learning_rate": 5.9974656e-05, + "loss": 0.7091, + "step": 21120 + }, + { + "epoch": 1.6, + "learning_rate": 5.9974584e-05, + "loss": 0.7012, + "step": 21180 + }, + { + "epoch": 1.6, + "learning_rate": 5.9974512e-05, + "loss": 0.6814, + "step": 21240 + }, + { + "epoch": 1.61, + "learning_rate": 5.997444e-05, + "loss": 0.6801, + "step": 21300 + }, + { + "epoch": 1.61, + "learning_rate": 5.9974368000000005e-05, + "loss": 0.6956, + "step": 21360 + }, + { + "epoch": 1.62, + "learning_rate": 5.9974296e-05, + "loss": 0.6945, + "step": 21420 + }, + { + "epoch": 1.62, + "learning_rate": 5.9974224e-05, + "loss": 0.7011, + "step": 21480 + }, + { + "epoch": 1.63, + "learning_rate": 5.9974152e-05, + "loss": 0.6843, + "step": 21540 + }, + { + "epoch": 1.63, + "learning_rate": 5.9974080000000004e-05, + "loss": 0.6873, + "step": 21600 + }, + { + "epoch": 1.64, + "learning_rate": 5.9974008e-05, + "loss": 0.6932, + "step": 21660 + }, + { + "epoch": 1.64, + "learning_rate": 5.9973936000000004e-05, + "loss": 0.6895, + "step": 21720 + }, + { + "epoch": 1.64, + "learning_rate": 5.9973864e-05, + "loss": 0.7045, + "step": 21780 + }, + { + "epoch": 1.65, + "learning_rate": 5.9973792e-05, + "loss": 0.6971, + "step": 21840 + }, + { + "epoch": 1.65, + "learning_rate": 5.997372e-05, + "loss": 0.6963, + "step": 21900 + }, + { + "epoch": 1.66, + "learning_rate": 5.9973648e-05, + "loss": 0.6842, + "step": 21960 + }, + { + "epoch": 1.66, + "learning_rate": 5.9973576e-05, + "loss": 0.6939, + "step": 22020 + }, + { + "epoch": 1.67, + "learning_rate": 5.9973504e-05, + "loss": 0.6828, + "step": 22080 + }, + { + "epoch": 1.67, + "learning_rate": 5.9973432000000005e-05, + "loss": 0.688, + "step": 22140 + }, + { + "epoch": 1.68, + "learning_rate": 5.997336e-05, + "loss": 0.6801, + "step": 22200 + }, + { + "epoch": 1.68, + "learning_rate": 5.9973288e-05, + "loss": 0.681, + "step": 22260 + }, + { + "epoch": 1.69, + "learning_rate": 5.9973216e-05, + "loss": 0.6882, + "step": 22320 + }, + { + "epoch": 1.69, + "learning_rate": 5.9973144000000004e-05, + "loss": 0.6953, + "step": 22380 + }, + { + "epoch": 1.69, + "learning_rate": 5.9973072e-05, + "loss": 0.6971, + "step": 22440 + }, + { + "epoch": 1.7, + "learning_rate": 5.9973e-05, + "loss": 0.6865, + "step": 22500 + }, + { + "epoch": 1.7, + "learning_rate": 5.9972928e-05, + "loss": 0.6867, + "step": 22560 + }, + { + "epoch": 1.71, + "learning_rate": 5.9972856e-05, + "loss": 0.6833, + "step": 22620 + }, + { + "epoch": 1.71, + "learning_rate": 5.9972784e-05, + "loss": 0.6835, + "step": 22680 + }, + { + "epoch": 1.72, + "learning_rate": 5.9972712e-05, + "loss": 0.6931, + "step": 22740 + }, + { + "epoch": 1.72, + "learning_rate": 5.997264e-05, + "loss": 0.6945, + "step": 22800 + }, + { + "epoch": 1.73, + "learning_rate": 5.9972568e-05, + "loss": 0.6746, + "step": 22860 + }, + { + "epoch": 1.73, + "learning_rate": 5.9972496000000005e-05, + "loss": 0.6923, + "step": 22920 + }, + { + "epoch": 1.73, + "learning_rate": 5.9972424e-05, + "loss": 0.6877, + "step": 22980 + }, + { + "epoch": 1.74, + "learning_rate": 5.9972352e-05, + "loss": 0.6924, + "step": 23040 + }, + { + "epoch": 1.74, + "learning_rate": 5.997228000000001e-05, + "loss": 0.6685, + "step": 23100 + }, + { + "epoch": 1.75, + "learning_rate": 5.9972208000000003e-05, + "loss": 0.6774, + "step": 23160 + }, + { + "epoch": 1.75, + "learning_rate": 5.9972136e-05, + "loss": 0.6905, + "step": 23220 + }, + { + "epoch": 1.76, + "learning_rate": 5.9972064e-05, + "loss": 0.6846, + "step": 23280 + }, + { + "epoch": 1.76, + "learning_rate": 5.9971992e-05, + "loss": 0.6854, + "step": 23340 + }, + { + "epoch": 1.77, + "learning_rate": 5.997192e-05, + "loss": 0.6903, + "step": 23400 + }, + { + "epoch": 1.77, + "learning_rate": 5.9971848000000005e-05, + "loss": 0.692, + "step": 23460 + }, + { + "epoch": 1.78, + "learning_rate": 5.9971776e-05, + "loss": 0.6843, + "step": 23520 + }, + { + "epoch": 1.78, + "learning_rate": 5.9971704e-05, + "loss": 0.6771, + "step": 23580 + }, + { + "epoch": 1.78, + "learning_rate": 5.9971632e-05, + "loss": 0.6741, + "step": 23640 + }, + { + "epoch": 1.79, + "learning_rate": 5.9971560000000004e-05, + "loss": 0.6756, + "step": 23700 + }, + { + "epoch": 1.79, + "learning_rate": 5.9971488e-05, + "loss": 0.675, + "step": 23760 + }, + { + "epoch": 1.8, + "learning_rate": 5.9971416e-05, + "loss": 0.6786, + "step": 23820 + }, + { + "epoch": 1.8, + "learning_rate": 5.997134400000001e-05, + "loss": 0.6768, + "step": 23880 + }, + { + "epoch": 1.81, + "learning_rate": 5.9971272e-05, + "loss": 0.6762, + "step": 23940 + }, + { + "epoch": 1.81, + "learning_rate": 5.99712e-05, + "loss": 0.6804, + "step": 24000 + }, + { + "epoch": 1.82, + "learning_rate": 5.9971128e-05, + "loss": 0.6882, + "step": 24060 + }, + { + "epoch": 1.82, + "learning_rate": 5.9971056e-05, + "loss": 0.6827, + "step": 24120 + }, + { + "epoch": 1.83, + "learning_rate": 5.9970984e-05, + "loss": 0.6819, + "step": 24180 + }, + { + "epoch": 1.83, + "learning_rate": 5.9970912000000005e-05, + "loss": 0.6813, + "step": 24240 + }, + { + "epoch": 1.83, + "learning_rate": 5.997084e-05, + "loss": 0.6791, + "step": 24300 + }, + { + "epoch": 1.84, + "learning_rate": 5.9970768e-05, + "loss": 0.6718, + "step": 24360 + }, + { + "epoch": 1.84, + "learning_rate": 5.9970696e-05, + "loss": 0.6925, + "step": 24420 + }, + { + "epoch": 1.85, + "learning_rate": 5.9970624000000004e-05, + "loss": 0.6834, + "step": 24480 + }, + { + "epoch": 1.85, + "learning_rate": 5.9970552e-05, + "loss": 0.6745, + "step": 24540 + }, + { + "epoch": 1.86, + "learning_rate": 5.9970479999999996e-05, + "loss": 0.6744, + "step": 24600 + }, + { + "epoch": 1.86, + "learning_rate": 5.9970408000000006e-05, + "loss": 0.6808, + "step": 24660 + }, + { + "epoch": 1.87, + "learning_rate": 5.9970336e-05, + "loss": 0.6731, + "step": 24720 + }, + { + "epoch": 1.87, + "learning_rate": 5.9970264e-05, + "loss": 0.6817, + "step": 24780 + }, + { + "epoch": 1.88, + "learning_rate": 5.9970192e-05, + "loss": 0.671, + "step": 24840 + }, + { + "epoch": 1.88, + "learning_rate": 5.997012e-05, + "loss": 0.6776, + "step": 24900 + }, + { + "epoch": 1.88, + "learning_rate": 5.9970048e-05, + "loss": 0.6636, + "step": 24960 + }, + { + "epoch": 1.89, + "learning_rate": 5.9969976000000005e-05, + "loss": 0.6903, + "step": 25020 + }, + { + "epoch": 1.89, + "learning_rate": 5.9969904e-05, + "loss": 0.6851, + "step": 25080 + }, + { + "epoch": 1.9, + "learning_rate": 5.9969832e-05, + "loss": 0.6763, + "step": 25140 + }, + { + "epoch": 1.9, + "learning_rate": 5.996976000000001e-05, + "loss": 0.6774, + "step": 25200 + }, + { + "epoch": 1.91, + "learning_rate": 5.9969688e-05, + "loss": 0.6845, + "step": 25260 + }, + { + "epoch": 1.91, + "learning_rate": 5.9969616e-05, + "loss": 0.6876, + "step": 25320 + }, + { + "epoch": 1.92, + "learning_rate": 5.9969543999999996e-05, + "loss": 0.6632, + "step": 25380 + }, + { + "epoch": 1.92, + "learning_rate": 5.9969472000000006e-05, + "loss": 0.6601, + "step": 25440 + }, + { + "epoch": 1.93, + "learning_rate": 5.99694e-05, + "loss": 0.6789, + "step": 25500 + }, + { + "epoch": 1.93, + "learning_rate": 5.9969328e-05, + "loss": 0.6783, + "step": 25560 + }, + { + "epoch": 1.93, + "learning_rate": 5.9969256e-05, + "loss": 0.6849, + "step": 25620 + }, + { + "epoch": 1.94, + "learning_rate": 5.9969184000000005e-05, + "loss": 0.6644, + "step": 25680 + }, + { + "epoch": 1.94, + "learning_rate": 5.9969112e-05, + "loss": 0.6633, + "step": 25740 + }, + { + "epoch": 1.95, + "learning_rate": 5.9969040000000004e-05, + "loss": 0.6709, + "step": 25800 + }, + { + "epoch": 1.95, + "learning_rate": 5.9968968e-05, + "loss": 0.6801, + "step": 25860 + }, + { + "epoch": 1.96, + "learning_rate": 5.9968896e-05, + "loss": 0.6572, + "step": 25920 + }, + { + "epoch": 1.96, + "learning_rate": 5.996882400000001e-05, + "loss": 0.671, + "step": 25980 + }, + { + "epoch": 1.97, + "learning_rate": 5.9968752e-05, + "loss": 0.6818, + "step": 26040 + }, + { + "epoch": 1.97, + "learning_rate": 5.996868e-05, + "loss": 0.6689, + "step": 26100 + }, + { + "epoch": 1.98, + "learning_rate": 5.9968608e-05, + "loss": 0.6863, + "step": 26160 + }, + { + "epoch": 1.98, + "learning_rate": 5.9968536000000005e-05, + "loss": 0.677, + "step": 26220 + }, + { + "epoch": 1.98, + "learning_rate": 5.9968464e-05, + "loss": 0.6798, + "step": 26280 + }, + { + "epoch": 1.99, + "learning_rate": 5.9968392e-05, + "loss": 0.6711, + "step": 26340 + }, + { + "epoch": 1.99, + "learning_rate": 5.996832e-05, + "loss": 0.692, + "step": 26400 + }, + { + "epoch": 2.0, + "learning_rate": 5.9968248000000004e-05, + "loss": 0.6759, + "step": 26460 + }, + { + "epoch": 2.0, + "eval_loss": 0.6304686069488525, + "eval_runtime": 339.6799, + "eval_samples_per_second": 294.395, + "eval_steps_per_second": 2.302, + "step": 26490 + }, + { + "epoch": 2.0, + "learning_rate": 5.9968176e-05, + "loss": 0.6702, + "step": 26520 + }, + { + "epoch": 2.01, + "learning_rate": 5.9968104000000004e-05, + "loss": 0.6584, + "step": 26580 + }, + { + "epoch": 2.01, + "learning_rate": 5.9968032e-05, + "loss": 0.6718, + "step": 26640 + }, + { + "epoch": 2.02, + "learning_rate": 5.9967959999999996e-05, + "loss": 0.6686, + "step": 26700 + }, + { + "epoch": 2.02, + "learning_rate": 5.9967888000000006e-05, + "loss": 0.6551, + "step": 26760 + }, + { + "epoch": 2.02, + "learning_rate": 5.9967816e-05, + "loss": 0.6716, + "step": 26820 + }, + { + "epoch": 2.03, + "learning_rate": 5.9967744e-05, + "loss": 0.6739, + "step": 26880 + }, + { + "epoch": 2.03, + "learning_rate": 5.9967672e-05, + "loss": 0.6621, + "step": 26940 + }, + { + "epoch": 2.04, + "learning_rate": 5.9967600000000005e-05, + "loss": 0.6755, + "step": 27000 + }, + { + "epoch": 2.04, + "learning_rate": 5.9967528e-05, + "loss": 0.6701, + "step": 27060 + }, + { + "epoch": 2.05, + "learning_rate": 5.9967456e-05, + "loss": 0.67, + "step": 27120 + }, + { + "epoch": 2.05, + "learning_rate": 5.9967384e-05, + "loss": 0.6553, + "step": 27180 + }, + { + "epoch": 2.06, + "learning_rate": 5.9967312000000004e-05, + "loss": 0.6751, + "step": 27240 + }, + { + "epoch": 2.06, + "learning_rate": 5.996724e-05, + "loss": 0.6609, + "step": 27300 + }, + { + "epoch": 2.07, + "learning_rate": 5.9967168e-05, + "loss": 0.6705, + "step": 27360 + }, + { + "epoch": 2.07, + "learning_rate": 5.9967096e-05, + "loss": 0.6653, + "step": 27420 + }, + { + "epoch": 2.07, + "learning_rate": 5.9967023999999996e-05, + "loss": 0.6738, + "step": 27480 + }, + { + "epoch": 2.08, + "learning_rate": 5.9966952000000006e-05, + "loss": 0.6755, + "step": 27540 + }, + { + "epoch": 2.08, + "learning_rate": 5.996688e-05, + "loss": 0.6721, + "step": 27600 + }, + { + "epoch": 2.09, + "learning_rate": 5.9966808e-05, + "loss": 0.6522, + "step": 27660 + }, + { + "epoch": 2.09, + "learning_rate": 5.9966736e-05, + "loss": 0.6683, + "step": 27720 + }, + { + "epoch": 2.1, + "learning_rate": 5.9966664000000005e-05, + "loss": 0.6654, + "step": 27780 + }, + { + "epoch": 2.1, + "learning_rate": 5.9966592e-05, + "loss": 0.6645, + "step": 27840 + }, + { + "epoch": 2.11, + "learning_rate": 5.9966520000000004e-05, + "loss": 0.6523, + "step": 27900 + }, + { + "epoch": 2.11, + "learning_rate": 5.9966448e-05, + "loss": 0.6684, + "step": 27960 + }, + { + "epoch": 2.12, + "learning_rate": 5.9966376000000003e-05, + "loss": 0.6562, + "step": 28020 + }, + { + "epoch": 2.12, + "learning_rate": 5.9966304e-05, + "loss": 0.6689, + "step": 28080 + }, + { + "epoch": 2.12, + "learning_rate": 5.9966232e-05, + "loss": 0.654, + "step": 28140 + }, + { + "epoch": 2.13, + "learning_rate": 5.996616e-05, + "loss": 0.6565, + "step": 28200 + }, + { + "epoch": 2.13, + "learning_rate": 5.9966088e-05, + "loss": 0.6572, + "step": 28260 + }, + { + "epoch": 2.14, + "learning_rate": 5.9966016000000005e-05, + "loss": 0.6693, + "step": 28320 + }, + { + "epoch": 2.14, + "learning_rate": 5.9965944e-05, + "loss": 0.665, + "step": 28380 + }, + { + "epoch": 2.15, + "learning_rate": 5.9965872e-05, + "loss": 0.6561, + "step": 28440 + }, + { + "epoch": 2.15, + "learning_rate": 5.99658e-05, + "loss": 0.6662, + "step": 28500 + }, + { + "epoch": 2.16, + "learning_rate": 5.9965728000000004e-05, + "loss": 0.6771, + "step": 28560 + }, + { + "epoch": 2.16, + "learning_rate": 5.9965656e-05, + "loss": 0.6525, + "step": 28620 + }, + { + "epoch": 2.17, + "learning_rate": 5.9965584000000004e-05, + "loss": 0.6739, + "step": 28680 + }, + { + "epoch": 2.17, + "learning_rate": 5.9965512e-05, + "loss": 0.6604, + "step": 28740 + }, + { + "epoch": 2.17, + "learning_rate": 5.996544e-05, + "loss": 0.6556, + "step": 28800 + }, + { + "epoch": 2.18, + "learning_rate": 5.9965368e-05, + "loss": 0.6709, + "step": 28860 + }, + { + "epoch": 2.18, + "learning_rate": 5.9965296e-05, + "loss": 0.6594, + "step": 28920 + }, + { + "epoch": 2.19, + "learning_rate": 5.9965224e-05, + "loss": 0.6688, + "step": 28980 + }, + { + "epoch": 2.19, + "learning_rate": 5.9965152e-05, + "loss": 0.6597, + "step": 29040 + }, + { + "epoch": 2.2, + "learning_rate": 5.9965080000000005e-05, + "loss": 0.6599, + "step": 29100 + }, + { + "epoch": 2.2, + "learning_rate": 5.9965008e-05, + "loss": 0.6638, + "step": 29160 + }, + { + "epoch": 2.21, + "learning_rate": 5.9964936e-05, + "loss": 0.6642, + "step": 29220 + }, + { + "epoch": 2.21, + "learning_rate": 5.9964864e-05, + "loss": 0.6624, + "step": 29280 + }, + { + "epoch": 2.22, + "learning_rate": 5.9964792000000004e-05, + "loss": 0.6693, + "step": 29340 + }, + { + "epoch": 2.22, + "learning_rate": 5.996472e-05, + "loss": 0.6688, + "step": 29400 + }, + { + "epoch": 2.22, + "learning_rate": 5.9964648e-05, + "loss": 0.6659, + "step": 29460 + }, + { + "epoch": 2.23, + "learning_rate": 5.9964576e-05, + "loss": 0.6575, + "step": 29520 + }, + { + "epoch": 2.23, + "learning_rate": 5.9964504e-05, + "loss": 0.6646, + "step": 29580 + }, + { + "epoch": 2.24, + "learning_rate": 5.9964432000000006e-05, + "loss": 0.6665, + "step": 29640 + }, + { + "epoch": 2.24, + "learning_rate": 5.996436e-05, + "loss": 0.6603, + "step": 29700 + }, + { + "epoch": 2.25, + "learning_rate": 5.9964288e-05, + "loss": 0.6514, + "step": 29760 + }, + { + "epoch": 2.25, + "learning_rate": 5.9964216e-05, + "loss": 0.6591, + "step": 29820 + }, + { + "epoch": 2.26, + "learning_rate": 5.9964144000000004e-05, + "loss": 0.6671, + "step": 29880 + }, + { + "epoch": 2.26, + "learning_rate": 5.9964072e-05, + "loss": 0.6413, + "step": 29940 + }, + { + "epoch": 2.27, + "learning_rate": 5.9964e-05, + "loss": 0.6458, + "step": 30000 + }, + { + "epoch": 2.27, + "learning_rate": 5.9963928e-05, + "loss": 0.6527, + "step": 30060 + }, + { + "epoch": 2.27, + "learning_rate": 5.9963856e-05, + "loss": 0.6715, + "step": 30120 + }, + { + "epoch": 2.28, + "learning_rate": 5.9963784e-05, + "loss": 0.6625, + "step": 30180 + }, + { + "epoch": 2.28, + "learning_rate": 5.9963712e-05, + "loss": 0.6449, + "step": 30240 + }, + { + "epoch": 2.29, + "learning_rate": 5.996364e-05, + "loss": 0.6707, + "step": 30300 + }, + { + "epoch": 2.29, + "learning_rate": 5.9963568e-05, + "loss": 0.6742, + "step": 30360 + }, + { + "epoch": 2.3, + "learning_rate": 5.9963496000000005e-05, + "loss": 0.6749, + "step": 30420 + }, + { + "epoch": 2.3, + "learning_rate": 5.9963424e-05, + "loss": 0.6528, + "step": 30480 + }, + { + "epoch": 2.31, + "learning_rate": 5.9963352e-05, + "loss": 0.662, + "step": 30540 + }, + { + "epoch": 2.31, + "learning_rate": 5.996328000000001e-05, + "loss": 0.663, + "step": 30600 + }, + { + "epoch": 2.31, + "learning_rate": 5.9963208000000004e-05, + "loss": 0.6633, + "step": 30660 + }, + { + "epoch": 2.32, + "learning_rate": 5.9963136e-05, + "loss": 0.6746, + "step": 30720 + }, + { + "epoch": 2.32, + "learning_rate": 5.9963064e-05, + "loss": 0.6711, + "step": 30780 + }, + { + "epoch": 2.33, + "learning_rate": 5.9962992e-05, + "loss": 0.6673, + "step": 30840 + }, + { + "epoch": 2.33, + "learning_rate": 5.996292e-05, + "loss": 0.6504, + "step": 30900 + }, + { + "epoch": 2.34, + "learning_rate": 5.9962848e-05, + "loss": 0.6711, + "step": 30960 + }, + { + "epoch": 2.34, + "learning_rate": 5.9962776e-05, + "loss": 0.6554, + "step": 31020 + }, + { + "epoch": 2.35, + "learning_rate": 5.9962704e-05, + "loss": 0.6507, + "step": 31080 + }, + { + "epoch": 2.35, + "learning_rate": 5.9962632e-05, + "loss": 0.6635, + "step": 31140 + }, + { + "epoch": 2.36, + "learning_rate": 5.9962560000000005e-05, + "loss": 0.671, + "step": 31200 + }, + { + "epoch": 2.36, + "learning_rate": 5.9962488e-05, + "loss": 0.6607, + "step": 31260 + }, + { + "epoch": 2.36, + "learning_rate": 5.9962416e-05, + "loss": 0.673, + "step": 31320 + }, + { + "epoch": 2.37, + "learning_rate": 5.996234400000001e-05, + "loss": 0.6578, + "step": 31380 + }, + { + "epoch": 2.37, + "learning_rate": 5.9962272000000004e-05, + "loss": 0.6483, + "step": 31440 + }, + { + "epoch": 2.38, + "learning_rate": 5.99622e-05, + "loss": 0.6557, + "step": 31500 + }, + { + "epoch": 2.38, + "learning_rate": 5.9962127999999996e-05, + "loss": 0.6432, + "step": 31560 + }, + { + "epoch": 2.39, + "learning_rate": 5.9962056000000006e-05, + "loss": 0.654, + "step": 31620 + }, + { + "epoch": 2.39, + "learning_rate": 5.9961984e-05, + "loss": 0.6546, + "step": 31680 + }, + { + "epoch": 2.4, + "learning_rate": 5.9961912e-05, + "loss": 0.654, + "step": 31740 + }, + { + "epoch": 2.4, + "learning_rate": 5.996184e-05, + "loss": 0.6462, + "step": 31800 + }, + { + "epoch": 2.41, + "learning_rate": 5.9961768e-05, + "loss": 0.6677, + "step": 31860 + }, + { + "epoch": 2.41, + "learning_rate": 5.9961696e-05, + "loss": 0.6562, + "step": 31920 + }, + { + "epoch": 2.41, + "learning_rate": 5.9961624000000004e-05, + "loss": 0.6454, + "step": 31980 + }, + { + "epoch": 2.42, + "learning_rate": 5.9961552e-05, + "loss": 0.66, + "step": 32040 + }, + { + "epoch": 2.42, + "learning_rate": 5.996148e-05, + "loss": 0.6623, + "step": 32100 + }, + { + "epoch": 2.43, + "learning_rate": 5.996140800000001e-05, + "loss": 0.6524, + "step": 32160 + }, + { + "epoch": 2.43, + "learning_rate": 5.9961336e-05, + "loss": 0.655, + "step": 32220 + }, + { + "epoch": 2.44, + "learning_rate": 5.9961264e-05, + "loss": 0.6531, + "step": 32280 + }, + { + "epoch": 2.44, + "learning_rate": 5.9961192e-05, + "loss": 0.6634, + "step": 32340 + }, + { + "epoch": 2.45, + "learning_rate": 5.9961120000000006e-05, + "loss": 0.6454, + "step": 32400 + }, + { + "epoch": 2.45, + "learning_rate": 5.9961048e-05, + "loss": 0.6593, + "step": 32460 + }, + { + "epoch": 2.46, + "learning_rate": 5.9960976e-05, + "loss": 0.6455, + "step": 32520 + }, + { + "epoch": 2.46, + "learning_rate": 5.9960904e-05, + "loss": 0.6584, + "step": 32580 + }, + { + "epoch": 2.46, + "learning_rate": 5.9960832e-05, + "loss": 0.6396, + "step": 32640 + }, + { + "epoch": 2.47, + "learning_rate": 5.996076e-05, + "loss": 0.6506, + "step": 32700 + }, + { + "epoch": 2.47, + "learning_rate": 5.9960688000000004e-05, + "loss": 0.6572, + "step": 32760 + }, + { + "epoch": 2.48, + "learning_rate": 5.9960616e-05, + "loss": 0.6574, + "step": 32820 + }, + { + "epoch": 2.48, + "learning_rate": 5.9960543999999997e-05, + "loss": 0.6605, + "step": 32880 + }, + { + "epoch": 2.49, + "learning_rate": 5.9960472000000006e-05, + "loss": 0.6422, + "step": 32940 + }, + { + "epoch": 2.49, + "learning_rate": 5.99604e-05, + "loss": 0.6668, + "step": 33000 + }, + { + "epoch": 2.5, + "learning_rate": 5.9960328e-05, + "loss": 0.6497, + "step": 33060 + }, + { + "epoch": 2.5, + "learning_rate": 5.9960256e-05, + "loss": 0.6652, + "step": 33120 + }, + { + "epoch": 2.51, + "learning_rate": 5.9960184000000005e-05, + "loss": 0.6649, + "step": 33180 + }, + { + "epoch": 2.51, + "learning_rate": 5.9960112e-05, + "loss": 0.6562, + "step": 33240 + }, + { + "epoch": 2.51, + "learning_rate": 5.996004e-05, + "loss": 0.6517, + "step": 33300 + }, + { + "epoch": 2.52, + "learning_rate": 5.9959968e-05, + "loss": 0.6489, + "step": 33360 + }, + { + "epoch": 2.52, + "learning_rate": 5.9959896e-05, + "loss": 0.6579, + "step": 33420 + }, + { + "epoch": 2.53, + "learning_rate": 5.9959824e-05, + "loss": 0.6519, + "step": 33480 + }, + { + "epoch": 2.53, + "learning_rate": 5.9959752000000004e-05, + "loss": 0.6481, + "step": 33540 + }, + { + "epoch": 2.54, + "learning_rate": 5.995968e-05, + "loss": 0.6625, + "step": 33600 + }, + { + "epoch": 2.54, + "learning_rate": 5.9959607999999996e-05, + "loss": 0.6598, + "step": 33660 + }, + { + "epoch": 2.55, + "learning_rate": 5.9959536000000006e-05, + "loss": 0.6423, + "step": 33720 + }, + { + "epoch": 2.55, + "learning_rate": 5.9959464e-05, + "loss": 0.6534, + "step": 33780 + }, + { + "epoch": 2.55, + "learning_rate": 5.9959392e-05, + "loss": 0.6484, + "step": 33840 + }, + { + "epoch": 2.56, + "learning_rate": 5.995932e-05, + "loss": 0.6522, + "step": 33900 + }, + { + "epoch": 2.56, + "learning_rate": 5.9959248000000005e-05, + "loss": 0.6486, + "step": 33960 + }, + { + "epoch": 2.57, + "learning_rate": 5.9959176e-05, + "loss": 0.6546, + "step": 34020 + }, + { + "epoch": 2.57, + "learning_rate": 5.9959104000000004e-05, + "loss": 0.6656, + "step": 34080 + }, + { + "epoch": 2.58, + "learning_rate": 5.9959032e-05, + "loss": 0.6532, + "step": 34140 + }, + { + "epoch": 2.58, + "learning_rate": 5.9958960000000004e-05, + "loss": 0.6557, + "step": 34200 + }, + { + "epoch": 2.59, + "learning_rate": 5.9958888e-05, + "loss": 0.6442, + "step": 34260 + }, + { + "epoch": 2.59, + "learning_rate": 5.9958816e-05, + "loss": 0.6521, + "step": 34320 + }, + { + "epoch": 2.6, + "learning_rate": 5.9958744e-05, + "loss": 0.6526, + "step": 34380 + }, + { + "epoch": 2.6, + "learning_rate": 5.9958671999999996e-05, + "loss": 0.6364, + "step": 34440 + }, + { + "epoch": 2.6, + "learning_rate": 5.9958600000000006e-05, + "loss": 0.6536, + "step": 34500 + }, + { + "epoch": 2.61, + "learning_rate": 5.9958528e-05, + "loss": 0.6661, + "step": 34560 + }, + { + "epoch": 2.61, + "learning_rate": 5.9958456e-05, + "loss": 0.6349, + "step": 34620 + }, + { + "epoch": 2.62, + "learning_rate": 5.9958384e-05, + "loss": 0.6366, + "step": 34680 + }, + { + "epoch": 2.62, + "learning_rate": 5.9958312000000004e-05, + "loss": 0.6632, + "step": 34740 + }, + { + "epoch": 2.63, + "learning_rate": 5.995824e-05, + "loss": 0.6511, + "step": 34800 + }, + { + "epoch": 2.63, + "learning_rate": 5.9958168000000004e-05, + "loss": 0.6542, + "step": 34860 + }, + { + "epoch": 2.64, + "learning_rate": 5.9958096e-05, + "loss": 0.6465, + "step": 34920 + }, + { + "epoch": 2.64, + "learning_rate": 5.9958024e-05, + "loss": 0.644, + "step": 34980 + }, + { + "epoch": 2.65, + "learning_rate": 5.9957952000000006e-05, + "loss": 0.6445, + "step": 35040 + }, + { + "epoch": 2.65, + "learning_rate": 5.995788e-05, + "loss": 0.643, + "step": 35100 + }, + { + "epoch": 2.65, + "learning_rate": 5.9957808e-05, + "loss": 0.652, + "step": 35160 + }, + { + "epoch": 2.66, + "learning_rate": 5.9957735999999995e-05, + "loss": 0.6375, + "step": 35220 + }, + { + "epoch": 2.66, + "learning_rate": 5.9957664000000005e-05, + "loss": 0.642, + "step": 35280 + }, + { + "epoch": 2.67, + "learning_rate": 5.9957592e-05, + "loss": 0.6422, + "step": 35340 + }, + { + "epoch": 2.67, + "learning_rate": 5.995752e-05, + "loss": 0.6552, + "step": 35400 + }, + { + "epoch": 2.68, + "learning_rate": 5.9957448e-05, + "loss": 0.6568, + "step": 35460 + }, + { + "epoch": 2.68, + "learning_rate": 5.9957376000000004e-05, + "loss": 0.6429, + "step": 35520 + }, + { + "epoch": 2.69, + "learning_rate": 5.9957304e-05, + "loss": 0.6411, + "step": 35580 + }, + { + "epoch": 2.69, + "learning_rate": 5.9957232000000003e-05, + "loss": 0.6687, + "step": 35640 + }, + { + "epoch": 2.7, + "learning_rate": 5.995716e-05, + "loss": 0.6598, + "step": 35700 + }, + { + "epoch": 2.7, + "learning_rate": 5.9957088e-05, + "loss": 0.6478, + "step": 35760 + }, + { + "epoch": 2.7, + "learning_rate": 5.9957016000000006e-05, + "loss": 0.6422, + "step": 35820 + }, + { + "epoch": 2.71, + "learning_rate": 5.9956944e-05, + "loss": 0.6482, + "step": 35880 + }, + { + "epoch": 2.71, + "learning_rate": 5.9956872e-05, + "loss": 0.655, + "step": 35940 + }, + { + "epoch": 2.72, + "learning_rate": 5.99568e-05, + "loss": 0.6461, + "step": 36000 + }, + { + "epoch": 2.72, + "learning_rate": 5.9956728000000005e-05, + "loss": 0.655, + "step": 36060 + }, + { + "epoch": 2.73, + "learning_rate": 5.9956656e-05, + "loss": 0.6643, + "step": 36120 + }, + { + "epoch": 2.73, + "learning_rate": 5.9956584e-05, + "loss": 0.6349, + "step": 36180 + }, + { + "epoch": 2.74, + "learning_rate": 5.9956512e-05, + "loss": 0.6512, + "step": 36240 + }, + { + "epoch": 2.74, + "learning_rate": 5.9956440000000004e-05, + "loss": 0.6477, + "step": 36300 + }, + { + "epoch": 2.75, + "learning_rate": 5.9956368e-05, + "loss": 0.6471, + "step": 36360 + }, + { + "epoch": 2.75, + "learning_rate": 5.9956296e-05, + "loss": 0.6455, + "step": 36420 + }, + { + "epoch": 2.75, + "learning_rate": 5.9956224e-05, + "loss": 0.6387, + "step": 36480 + }, + { + "epoch": 2.76, + "learning_rate": 5.9956152e-05, + "loss": 0.6482, + "step": 36540 + }, + { + "epoch": 2.76, + "learning_rate": 5.9956080000000006e-05, + "loss": 0.6553, + "step": 36600 + }, + { + "epoch": 2.77, + "learning_rate": 5.9956008e-05, + "loss": 0.6485, + "step": 36660 + }, + { + "epoch": 2.77, + "learning_rate": 5.9955936e-05, + "loss": 0.6458, + "step": 36720 + }, + { + "epoch": 2.78, + "learning_rate": 5.995586400000001e-05, + "loss": 0.641, + "step": 36780 + }, + { + "epoch": 2.78, + "learning_rate": 5.9955792000000004e-05, + "loss": 0.6466, + "step": 36840 + }, + { + "epoch": 2.79, + "learning_rate": 5.995572e-05, + "loss": 0.6493, + "step": 36900 + }, + { + "epoch": 2.79, + "learning_rate": 5.9955648e-05, + "loss": 0.6465, + "step": 36960 + }, + { + "epoch": 2.8, + "learning_rate": 5.9955576e-05, + "loss": 0.6345, + "step": 37020 + }, + { + "epoch": 2.8, + "learning_rate": 5.9955504e-05, + "loss": 0.6472, + "step": 37080 + }, + { + "epoch": 2.8, + "learning_rate": 5.9955432e-05, + "loss": 0.6526, + "step": 37140 + }, + { + "epoch": 2.81, + "learning_rate": 5.995536e-05, + "loss": 0.6605, + "step": 37200 + }, + { + "epoch": 2.81, + "learning_rate": 5.9955288e-05, + "loss": 0.6597, + "step": 37260 + }, + { + "epoch": 2.82, + "learning_rate": 5.9955216e-05, + "loss": 0.6428, + "step": 37320 + }, + { + "epoch": 2.82, + "learning_rate": 5.9955144000000005e-05, + "loss": 0.6491, + "step": 37380 + }, + { + "epoch": 2.83, + "learning_rate": 5.9955072e-05, + "loss": 0.6408, + "step": 37440 + }, + { + "epoch": 2.83, + "learning_rate": 5.9955e-05, + "loss": 0.6314, + "step": 37500 + }, + { + "epoch": 2.84, + "learning_rate": 5.995492800000001e-05, + "loss": 0.6494, + "step": 37560 + }, + { + "epoch": 2.84, + "learning_rate": 5.9954856000000004e-05, + "loss": 0.6493, + "step": 37620 + }, + { + "epoch": 2.84, + "learning_rate": 5.9954784e-05, + "loss": 0.6515, + "step": 37680 + }, + { + "epoch": 2.85, + "learning_rate": 5.9954711999999997e-05, + "loss": 0.6491, + "step": 37740 + }, + { + "epoch": 2.85, + "learning_rate": 5.995464e-05, + "loss": 0.6523, + "step": 37800 + }, + { + "epoch": 2.86, + "learning_rate": 5.9954568e-05, + "loss": 0.6524, + "step": 37860 + }, + { + "epoch": 2.86, + "learning_rate": 5.9954496e-05, + "loss": 0.6483, + "step": 37920 + }, + { + "epoch": 2.87, + "learning_rate": 5.9954424e-05, + "loss": 0.6337, + "step": 37980 + }, + { + "epoch": 2.87, + "learning_rate": 5.9954352e-05, + "loss": 0.6486, + "step": 38040 + }, + { + "epoch": 2.88, + "learning_rate": 5.995428e-05, + "loss": 0.6491, + "step": 38100 + }, + { + "epoch": 2.88, + "learning_rate": 5.9954208000000005e-05, + "loss": 0.649, + "step": 38160 + }, + { + "epoch": 2.89, + "learning_rate": 5.9954136e-05, + "loss": 0.6485, + "step": 38220 + }, + { + "epoch": 2.89, + "learning_rate": 5.9954064e-05, + "loss": 0.6528, + "step": 38280 + }, + { + "epoch": 2.89, + "learning_rate": 5.995399200000001e-05, + "loss": 0.6455, + "step": 38340 + }, + { + "epoch": 2.9, + "learning_rate": 5.9953920000000003e-05, + "loss": 0.6312, + "step": 38400 + }, + { + "epoch": 2.9, + "learning_rate": 5.9953848e-05, + "loss": 0.6569, + "step": 38460 + }, + { + "epoch": 2.91, + "learning_rate": 5.9953776e-05, + "loss": 0.6436, + "step": 38520 + }, + { + "epoch": 2.91, + "learning_rate": 5.9953704e-05, + "loss": 0.6427, + "step": 38580 + }, + { + "epoch": 2.92, + "learning_rate": 5.9953632e-05, + "loss": 0.639, + "step": 38640 + }, + { + "epoch": 2.92, + "learning_rate": 5.995356e-05, + "loss": 0.6542, + "step": 38700 + }, + { + "epoch": 2.93, + "learning_rate": 5.9953488e-05, + "loss": 0.6524, + "step": 38760 + }, + { + "epoch": 2.93, + "learning_rate": 5.9953416e-05, + "loss": 0.6515, + "step": 38820 + }, + { + "epoch": 2.94, + "learning_rate": 5.9953344e-05, + "loss": 0.6283, + "step": 38880 + }, + { + "epoch": 2.94, + "learning_rate": 5.9953272000000004e-05, + "loss": 0.656, + "step": 38940 + }, + { + "epoch": 2.94, + "learning_rate": 5.99532e-05, + "loss": 0.6476, + "step": 39000 + }, + { + "epoch": 2.95, + "learning_rate": 5.9953128e-05, + "loss": 0.651, + "step": 39060 + }, + { + "epoch": 2.95, + "learning_rate": 5.995305600000001e-05, + "loss": 0.6431, + "step": 39120 + }, + { + "epoch": 2.96, + "learning_rate": 5.9952984e-05, + "loss": 0.6448, + "step": 39180 + }, + { + "epoch": 2.96, + "learning_rate": 5.9952912e-05, + "loss": 0.6377, + "step": 39240 + }, + { + "epoch": 2.97, + "learning_rate": 5.995284e-05, + "loss": 0.6469, + "step": 39300 + }, + { + "epoch": 2.97, + "learning_rate": 5.9952768000000006e-05, + "loss": 0.6399, + "step": 39360 + }, + { + "epoch": 2.98, + "learning_rate": 5.9952696e-05, + "loss": 0.6365, + "step": 39420 + }, + { + "epoch": 2.98, + "learning_rate": 5.9952624000000005e-05, + "loss": 0.6346, + "step": 39480 + }, + { + "epoch": 2.99, + "learning_rate": 5.9952552e-05, + "loss": 0.647, + "step": 39540 + }, + { + "epoch": 2.99, + "learning_rate": 5.995248e-05, + "loss": 0.6414, + "step": 39600 + }, + { + "epoch": 2.99, + "learning_rate": 5.9952408e-05, + "loss": 0.6308, + "step": 39660 + }, + { + "epoch": 3.0, + "learning_rate": 5.9952336000000004e-05, + "loss": 0.6505, + "step": 39720 + }, + { + "epoch": 3.0, + "eval_loss": 0.60313481092453, + "eval_runtime": 338.5244, + "eval_samples_per_second": 295.4, + "eval_steps_per_second": 2.31, + "step": 39735 + }, + { + "epoch": 3.0, + "learning_rate": 5.9952264e-05, + "loss": 0.6396, + "step": 39780 + }, + { + "epoch": 3.01, + "learning_rate": 5.9952191999999996e-05, + "loss": 0.6311, + "step": 39840 + }, + { + "epoch": 3.01, + "learning_rate": 5.9952120000000006e-05, + "loss": 0.6478, + "step": 39900 + }, + { + "epoch": 3.02, + "learning_rate": 5.9952048e-05, + "loss": 0.6335, + "step": 39960 + }, + { + "epoch": 3.02, + "learning_rate": 5.9951976e-05, + "loss": 0.6318, + "step": 40020 + }, + { + "epoch": 3.03, + "learning_rate": 5.9951904e-05, + "loss": 0.6384, + "step": 40080 + }, + { + "epoch": 3.03, + "learning_rate": 5.9951832000000005e-05, + "loss": 0.633, + "step": 40140 + }, + { + "epoch": 3.04, + "learning_rate": 5.995176e-05, + "loss": 0.6345, + "step": 40200 + }, + { + "epoch": 3.04, + "learning_rate": 5.9951688000000005e-05, + "loss": 0.6392, + "step": 40260 + }, + { + "epoch": 3.04, + "learning_rate": 5.9951616e-05, + "loss": 0.6314, + "step": 40320 + }, + { + "epoch": 3.05, + "learning_rate": 5.9951544e-05, + "loss": 0.64, + "step": 40380 + }, + { + "epoch": 3.05, + "learning_rate": 5.9951472e-05, + "loss": 0.6352, + "step": 40440 + }, + { + "epoch": 3.06, + "learning_rate": 5.99514e-05, + "loss": 0.6519, + "step": 40500 + }, + { + "epoch": 3.06, + "learning_rate": 5.9951328e-05, + "loss": 0.6145, + "step": 40560 + }, + { + "epoch": 3.07, + "learning_rate": 5.9951255999999996e-05, + "loss": 0.6442, + "step": 40620 + }, + { + "epoch": 3.07, + "learning_rate": 5.9951184000000006e-05, + "loss": 0.6402, + "step": 40680 + }, + { + "epoch": 3.08, + "learning_rate": 5.9951112e-05, + "loss": 0.6464, + "step": 40740 + }, + { + "epoch": 3.08, + "learning_rate": 5.995104e-05, + "loss": 0.6344, + "step": 40800 + }, + { + "epoch": 3.08, + "learning_rate": 5.9950968e-05, + "loss": 0.6261, + "step": 40860 + }, + { + "epoch": 3.09, + "learning_rate": 5.9950896000000005e-05, + "loss": 0.6335, + "step": 40920 + }, + { + "epoch": 3.09, + "learning_rate": 5.9950824e-05, + "loss": 0.6308, + "step": 40980 + }, + { + "epoch": 3.1, + "learning_rate": 5.9950752000000004e-05, + "loss": 0.629, + "step": 41040 + }, + { + "epoch": 3.1, + "learning_rate": 5.995068e-05, + "loss": 0.6299, + "step": 41100 + }, + { + "epoch": 3.11, + "learning_rate": 5.9950608e-05, + "loss": 0.6239, + "step": 41160 + }, + { + "epoch": 3.11, + "learning_rate": 5.995053600000001e-05, + "loss": 0.6467, + "step": 41220 + }, + { + "epoch": 3.12, + "learning_rate": 5.9950464e-05, + "loss": 0.634, + "step": 41280 + }, + { + "epoch": 3.12, + "learning_rate": 5.9950392e-05, + "loss": 0.6406, + "step": 41340 + }, + { + "epoch": 3.13, + "learning_rate": 5.9950319999999996e-05, + "loss": 0.634, + "step": 41400 + }, + { + "epoch": 3.13, + "learning_rate": 5.9950248000000005e-05, + "loss": 0.6471, + "step": 41460 + }, + { + "epoch": 3.13, + "learning_rate": 5.9950176e-05, + "loss": 0.6598, + "step": 41520 + }, + { + "epoch": 3.14, + "learning_rate": 5.9950104e-05, + "loss": 0.6457, + "step": 41580 + }, + { + "epoch": 3.14, + "learning_rate": 5.9950032e-05, + "loss": 0.6438, + "step": 41640 + }, + { + "epoch": 3.15, + "learning_rate": 5.9949960000000004e-05, + "loss": 0.6257, + "step": 41700 + }, + { + "epoch": 3.15, + "learning_rate": 5.9949888e-05, + "loss": 0.6234, + "step": 41760 + }, + { + "epoch": 3.16, + "learning_rate": 5.9949816000000004e-05, + "loss": 0.6368, + "step": 41820 + }, + { + "epoch": 3.16, + "learning_rate": 5.9949744e-05, + "loss": 0.6355, + "step": 41880 + }, + { + "epoch": 3.17, + "learning_rate": 5.9949672e-05, + "loss": 0.6212, + "step": 41940 + }, + { + "epoch": 3.17, + "learning_rate": 5.9949600000000006e-05, + "loss": 0.6424, + "step": 42000 + }, + { + "epoch": 3.18, + "learning_rate": 5.9949528e-05, + "loss": 0.6374, + "step": 42060 + }, + { + "epoch": 3.18, + "learning_rate": 5.9949456e-05, + "loss": 0.6323, + "step": 42120 + }, + { + "epoch": 3.18, + "learning_rate": 5.9949383999999995e-05, + "loss": 0.6326, + "step": 42180 + }, + { + "epoch": 3.19, + "learning_rate": 5.9949312000000005e-05, + "loss": 0.6331, + "step": 42240 + }, + { + "epoch": 3.19, + "learning_rate": 5.994924e-05, + "loss": 0.6299, + "step": 42300 + }, + { + "epoch": 3.2, + "learning_rate": 5.9949168e-05, + "loss": 0.6315, + "step": 42360 + }, + { + "epoch": 3.2, + "learning_rate": 5.9949096e-05, + "loss": 0.641, + "step": 42420 + }, + { + "epoch": 3.21, + "learning_rate": 5.9949024000000004e-05, + "loss": 0.6422, + "step": 42480 + }, + { + "epoch": 3.21, + "learning_rate": 5.9948952e-05, + "loss": 0.6289, + "step": 42540 + }, + { + "epoch": 3.22, + "learning_rate": 5.994888e-05, + "loss": 0.6355, + "step": 42600 + }, + { + "epoch": 3.22, + "learning_rate": 5.9948808e-05, + "loss": 0.6364, + "step": 42660 + }, + { + "epoch": 3.23, + "learning_rate": 5.9948736e-05, + "loss": 0.6311, + "step": 42720 + }, + { + "epoch": 3.23, + "learning_rate": 5.9948664000000006e-05, + "loss": 0.6352, + "step": 42780 + }, + { + "epoch": 3.23, + "learning_rate": 5.9948592e-05, + "loss": 0.6299, + "step": 42840 + }, + { + "epoch": 3.24, + "learning_rate": 5.994852e-05, + "loss": 0.6311, + "step": 42900 + }, + { + "epoch": 3.24, + "learning_rate": 5.9948448e-05, + "loss": 0.6367, + "step": 42960 + }, + { + "epoch": 3.25, + "learning_rate": 5.9948376000000005e-05, + "loss": 0.6342, + "step": 43020 + }, + { + "epoch": 3.25, + "learning_rate": 5.9948304e-05, + "loss": 0.6399, + "step": 43080 + }, + { + "epoch": 3.26, + "learning_rate": 5.9948232e-05, + "loss": 0.6335, + "step": 43140 + }, + { + "epoch": 3.26, + "learning_rate": 5.994816e-05, + "loss": 0.6391, + "step": 43200 + }, + { + "epoch": 3.27, + "learning_rate": 5.9948088000000003e-05, + "loss": 0.6321, + "step": 43260 + }, + { + "epoch": 3.27, + "learning_rate": 5.9948016e-05, + "loss": 0.6329, + "step": 43320 + }, + { + "epoch": 3.28, + "learning_rate": 5.9947944e-05, + "loss": 0.6365, + "step": 43380 + }, + { + "epoch": 3.28, + "learning_rate": 5.9947872e-05, + "loss": 0.6435, + "step": 43440 + }, + { + "epoch": 3.28, + "learning_rate": 5.99478e-05, + "loss": 0.6156, + "step": 43500 + }, + { + "epoch": 3.29, + "learning_rate": 5.9947728000000005e-05, + "loss": 0.629, + "step": 43560 + }, + { + "epoch": 3.29, + "learning_rate": 5.9947656e-05, + "loss": 0.6275, + "step": 43620 + }, + { + "epoch": 3.3, + "learning_rate": 5.9947584e-05, + "loss": 0.6492, + "step": 43680 + }, + { + "epoch": 3.3, + "learning_rate": 5.9947512e-05, + "loss": 0.6258, + "step": 43740 + }, + { + "epoch": 3.31, + "learning_rate": 5.9947440000000004e-05, + "loss": 0.6333, + "step": 43800 + }, + { + "epoch": 3.31, + "learning_rate": 5.9947368e-05, + "loss": 0.641, + "step": 43860 + }, + { + "epoch": 3.32, + "learning_rate": 5.9947296000000004e-05, + "loss": 0.6242, + "step": 43920 + }, + { + "epoch": 3.32, + "learning_rate": 5.9947224e-05, + "loss": 0.6332, + "step": 43980 + }, + { + "epoch": 3.33, + "learning_rate": 5.9947152e-05, + "loss": 0.647, + "step": 44040 + }, + { + "epoch": 3.33, + "learning_rate": 5.994708e-05, + "loss": 0.6403, + "step": 44100 + }, + { + "epoch": 3.33, + "learning_rate": 5.9947008e-05, + "loss": 0.6428, + "step": 44160 + }, + { + "epoch": 3.34, + "learning_rate": 5.9946936e-05, + "loss": 0.6369, + "step": 44220 + }, + { + "epoch": 3.34, + "learning_rate": 5.9946864e-05, + "loss": 0.6345, + "step": 44280 + }, + { + "epoch": 3.35, + "learning_rate": 5.9946792000000005e-05, + "loss": 0.6235, + "step": 44340 + }, + { + "epoch": 3.35, + "learning_rate": 5.994672e-05, + "loss": 0.6261, + "step": 44400 + }, + { + "epoch": 3.36, + "learning_rate": 5.9946648e-05, + "loss": 0.6231, + "step": 44460 + }, + { + "epoch": 3.36, + "learning_rate": 5.9946576e-05, + "loss": 0.6348, + "step": 44520 + }, + { + "epoch": 3.37, + "learning_rate": 5.9946504000000004e-05, + "loss": 0.6262, + "step": 44580 + }, + { + "epoch": 3.37, + "learning_rate": 5.9946432e-05, + "loss": 0.6325, + "step": 44640 + }, + { + "epoch": 3.37, + "learning_rate": 5.994636e-05, + "loss": 0.6308, + "step": 44700 + }, + { + "epoch": 3.38, + "learning_rate": 5.9946288e-05, + "loss": 0.6292, + "step": 44760 + }, + { + "epoch": 3.38, + "learning_rate": 5.9946216e-05, + "loss": 0.6365, + "step": 44820 + }, + { + "epoch": 3.39, + "learning_rate": 5.9946144e-05, + "loss": 0.6075, + "step": 44880 + }, + { + "epoch": 3.39, + "learning_rate": 5.9946072e-05, + "loss": 0.6316, + "step": 44940 + }, + { + "epoch": 3.4, + "learning_rate": 5.9946e-05, + "loss": 0.6216, + "step": 45000 + }, + { + "epoch": 3.4, + "learning_rate": 5.9945928e-05, + "loss": 0.624, + "step": 45060 + }, + { + "epoch": 3.41, + "learning_rate": 5.9945856000000004e-05, + "loss": 0.6371, + "step": 45120 + }, + { + "epoch": 3.41, + "learning_rate": 5.9945784e-05, + "loss": 0.6215, + "step": 45180 + }, + { + "epoch": 3.42, + "learning_rate": 5.9945712e-05, + "loss": 0.6304, + "step": 45240 + }, + { + "epoch": 3.42, + "learning_rate": 5.994564000000001e-05, + "loss": 0.6337, + "step": 45300 + }, + { + "epoch": 3.42, + "learning_rate": 5.9945568e-05, + "loss": 0.6418, + "step": 45360 + }, + { + "epoch": 3.43, + "learning_rate": 5.9945496e-05, + "loss": 0.6377, + "step": 45420 + }, + { + "epoch": 3.43, + "learning_rate": 5.9945424e-05, + "loss": 0.6416, + "step": 45480 + }, + { + "epoch": 3.44, + "learning_rate": 5.9945352e-05, + "loss": 0.617, + "step": 45540 + }, + { + "epoch": 3.44, + "learning_rate": 5.994528e-05, + "loss": 0.6221, + "step": 45600 + }, + { + "epoch": 3.45, + "learning_rate": 5.9945208000000005e-05, + "loss": 0.6255, + "step": 45660 + }, + { + "epoch": 3.45, + "learning_rate": 5.9945136e-05, + "loss": 0.6387, + "step": 45720 + }, + { + "epoch": 3.46, + "learning_rate": 5.9945064e-05, + "loss": 0.6311, + "step": 45780 + }, + { + "epoch": 3.46, + "learning_rate": 5.9944992e-05, + "loss": 0.6332, + "step": 45840 + }, + { + "epoch": 3.47, + "learning_rate": 5.9944920000000004e-05, + "loss": 0.6233, + "step": 45900 + }, + { + "epoch": 3.47, + "learning_rate": 5.9944848e-05, + "loss": 0.6395, + "step": 45960 + }, + { + "epoch": 3.47, + "learning_rate": 5.9944776e-05, + "loss": 0.6296, + "step": 46020 + }, + { + "epoch": 3.48, + "learning_rate": 5.9944704000000007e-05, + "loss": 0.6276, + "step": 46080 + }, + { + "epoch": 3.48, + "learning_rate": 5.9944632e-05, + "loss": 0.636, + "step": 46140 + }, + { + "epoch": 3.49, + "learning_rate": 5.994456e-05, + "loss": 0.6169, + "step": 46200 + }, + { + "epoch": 3.49, + "learning_rate": 5.9944488e-05, + "loss": 0.6233, + "step": 46260 + }, + { + "epoch": 3.5, + "learning_rate": 5.9944416e-05, + "loss": 0.6232, + "step": 46320 + }, + { + "epoch": 3.5, + "learning_rate": 5.9944344e-05, + "loss": 0.6362, + "step": 46380 + }, + { + "epoch": 3.51, + "learning_rate": 5.9944272000000005e-05, + "loss": 0.6218, + "step": 46440 + }, + { + "epoch": 3.51, + "learning_rate": 5.99442e-05, + "loss": 0.6356, + "step": 46500 + }, + { + "epoch": 3.52, + "learning_rate": 5.9944128e-05, + "loss": 0.6118, + "step": 46560 + }, + { + "epoch": 3.52, + "learning_rate": 5.9944056e-05, + "loss": 0.6275, + "step": 46620 + }, + { + "epoch": 3.52, + "learning_rate": 5.9943984000000004e-05, + "loss": 0.6309, + "step": 46680 + }, + { + "epoch": 3.53, + "learning_rate": 5.9943912e-05, + "loss": 0.6195, + "step": 46740 + }, + { + "epoch": 3.53, + "learning_rate": 5.9943839999999996e-05, + "loss": 0.6202, + "step": 46800 + }, + { + "epoch": 3.54, + "learning_rate": 5.9943768000000006e-05, + "loss": 0.6276, + "step": 46860 + }, + { + "epoch": 3.54, + "learning_rate": 5.9943696e-05, + "loss": 0.6316, + "step": 46920 + }, + { + "epoch": 3.55, + "learning_rate": 5.9943624e-05, + "loss": 0.6294, + "step": 46980 + }, + { + "epoch": 3.55, + "learning_rate": 5.9943552e-05, + "loss": 0.6335, + "step": 47040 + }, + { + "epoch": 3.56, + "learning_rate": 5.9943480000000005e-05, + "loss": 0.6182, + "step": 47100 + }, + { + "epoch": 3.56, + "learning_rate": 5.9943408e-05, + "loss": 0.629, + "step": 47160 + }, + { + "epoch": 3.57, + "learning_rate": 5.9943336000000004e-05, + "loss": 0.6342, + "step": 47220 + }, + { + "epoch": 3.57, + "learning_rate": 5.9943264e-05, + "loss": 0.6322, + "step": 47280 + }, + { + "epoch": 3.57, + "learning_rate": 5.9943192e-05, + "loss": 0.6268, + "step": 47340 + }, + { + "epoch": 3.58, + "learning_rate": 5.994312000000001e-05, + "loss": 0.6227, + "step": 47400 + }, + { + "epoch": 3.58, + "learning_rate": 5.9943048e-05, + "loss": 0.6227, + "step": 47460 + }, + { + "epoch": 3.59, + "learning_rate": 5.9942976e-05, + "loss": 0.6187, + "step": 47520 + }, + { + "epoch": 3.59, + "learning_rate": 5.9942903999999996e-05, + "loss": 0.6237, + "step": 47580 + }, + { + "epoch": 3.6, + "learning_rate": 5.9942832000000006e-05, + "loss": 0.6254, + "step": 47640 + }, + { + "epoch": 3.6, + "learning_rate": 5.994276e-05, + "loss": 0.6272, + "step": 47700 + }, + { + "epoch": 3.61, + "learning_rate": 5.9942688e-05, + "loss": 0.6162, + "step": 47760 + }, + { + "epoch": 3.61, + "learning_rate": 5.9942616e-05, + "loss": 0.6334, + "step": 47820 + }, + { + "epoch": 3.61, + "learning_rate": 5.9942544000000005e-05, + "loss": 0.6321, + "step": 47880 + }, + { + "epoch": 3.62, + "learning_rate": 5.9942472e-05, + "loss": 0.6285, + "step": 47940 + }, + { + "epoch": 3.62, + "learning_rate": 5.9942400000000004e-05, + "loss": 0.631, + "step": 48000 + }, + { + "epoch": 3.63, + "learning_rate": 5.9942328e-05, + "loss": 0.6264, + "step": 48060 + }, + { + "epoch": 3.63, + "learning_rate": 5.9942255999999997e-05, + "loss": 0.6396, + "step": 48120 + }, + { + "epoch": 3.64, + "learning_rate": 5.9942184000000006e-05, + "loss": 0.6345, + "step": 48180 + }, + { + "epoch": 3.64, + "learning_rate": 5.9942112e-05, + "loss": 0.6193, + "step": 48240 + }, + { + "epoch": 3.65, + "learning_rate": 5.994204e-05, + "loss": 0.612, + "step": 48300 + }, + { + "epoch": 3.65, + "learning_rate": 5.9941968e-05, + "loss": 0.6058, + "step": 48360 + }, + { + "epoch": 3.66, + "learning_rate": 5.9941896000000005e-05, + "loss": 0.6218, + "step": 48420 + }, + { + "epoch": 3.66, + "learning_rate": 5.9941824e-05, + "loss": 0.6287, + "step": 48480 + }, + { + "epoch": 3.66, + "learning_rate": 5.9941752e-05, + "loss": 0.6241, + "step": 48540 + }, + { + "epoch": 3.67, + "learning_rate": 5.994168e-05, + "loss": 0.62, + "step": 48600 + }, + { + "epoch": 3.67, + "learning_rate": 5.9941608000000004e-05, + "loss": 0.6175, + "step": 48660 + }, + { + "epoch": 3.68, + "learning_rate": 5.9941536e-05, + "loss": 0.6195, + "step": 48720 + }, + { + "epoch": 3.68, + "learning_rate": 5.9941464000000004e-05, + "loss": 0.6185, + "step": 48780 + }, + { + "epoch": 3.69, + "learning_rate": 5.9941392e-05, + "loss": 0.626, + "step": 48840 + }, + { + "epoch": 3.69, + "learning_rate": 5.9941319999999996e-05, + "loss": 0.6155, + "step": 48900 + }, + { + "epoch": 3.7, + "learning_rate": 5.9941248000000006e-05, + "loss": 0.6159, + "step": 48960 + }, + { + "epoch": 3.7, + "learning_rate": 5.9941176e-05, + "loss": 0.6263, + "step": 49020 + }, + { + "epoch": 3.71, + "learning_rate": 5.9941104e-05, + "loss": 0.621, + "step": 49080 + }, + { + "epoch": 3.71, + "learning_rate": 5.9941032e-05, + "loss": 0.6268, + "step": 49140 + }, + { + "epoch": 3.71, + "learning_rate": 5.9940960000000005e-05, + "loss": 0.6225, + "step": 49200 + }, + { + "epoch": 3.72, + "learning_rate": 5.9940888e-05, + "loss": 0.6362, + "step": 49260 + }, + { + "epoch": 3.72, + "learning_rate": 5.9940816e-05, + "loss": 0.6103, + "step": 49320 + }, + { + "epoch": 3.73, + "learning_rate": 5.9940744e-05, + "loss": 0.6185, + "step": 49380 + }, + { + "epoch": 3.73, + "learning_rate": 5.9940672000000004e-05, + "loss": 0.6218, + "step": 49440 + }, + { + "epoch": 3.74, + "learning_rate": 5.99406e-05, + "loss": 0.6188, + "step": 49500 + }, + { + "epoch": 3.74, + "learning_rate": 5.9940528e-05, + "loss": 0.6314, + "step": 49560 + }, + { + "epoch": 3.75, + "learning_rate": 5.9940456e-05, + "loss": 0.6121, + "step": 49620 + }, + { + "epoch": 3.75, + "learning_rate": 5.9940384e-05, + "loss": 0.6189, + "step": 49680 + }, + { + "epoch": 3.76, + "learning_rate": 5.9940312000000006e-05, + "loss": 0.6254, + "step": 49740 + }, + { + "epoch": 3.76, + "learning_rate": 5.994024e-05, + "loss": 0.62, + "step": 49800 + }, + { + "epoch": 3.76, + "learning_rate": 5.9940168e-05, + "loss": 0.6318, + "step": 49860 + }, + { + "epoch": 3.77, + "learning_rate": 5.9940096e-05, + "loss": 0.6232, + "step": 49920 + }, + { + "epoch": 3.77, + "learning_rate": 5.9940024000000004e-05, + "loss": 0.6145, + "step": 49980 + }, + { + "epoch": 3.78, + "learning_rate": 5.9939952e-05, + "loss": 0.6267, + "step": 50040 + }, + { + "epoch": 3.78, + "learning_rate": 5.9939880000000004e-05, + "loss": 0.6204, + "step": 50100 + }, + { + "epoch": 3.79, + "learning_rate": 5.9939808e-05, + "loss": 0.62, + "step": 50160 + }, + { + "epoch": 3.79, + "learning_rate": 5.9939736e-05, + "loss": 0.6281, + "step": 50220 + }, + { + "epoch": 3.8, + "learning_rate": 5.9939664e-05, + "loss": 0.6223, + "step": 50280 + }, + { + "epoch": 3.8, + "learning_rate": 5.9939592e-05, + "loss": 0.6308, + "step": 50340 + }, + { + "epoch": 3.81, + "learning_rate": 5.993952e-05, + "loss": 0.637, + "step": 50400 + }, + { + "epoch": 3.81, + "learning_rate": 5.9939448e-05, + "loss": 0.6139, + "step": 50460 + }, + { + "epoch": 3.81, + "learning_rate": 5.9939376000000005e-05, + "loss": 0.6219, + "step": 50520 + }, + { + "epoch": 3.82, + "learning_rate": 5.9939304e-05, + "loss": 0.6225, + "step": 50580 + }, + { + "epoch": 3.82, + "learning_rate": 5.9939232e-05, + "loss": 0.623, + "step": 50640 + }, + { + "epoch": 3.83, + "learning_rate": 5.993916e-05, + "loss": 0.6196, + "step": 50700 + }, + { + "epoch": 3.83, + "learning_rate": 5.9939088000000004e-05, + "loss": 0.6198, + "step": 50760 + }, + { + "epoch": 3.84, + "learning_rate": 5.9939016e-05, + "loss": 0.6133, + "step": 50820 + }, + { + "epoch": 3.84, + "learning_rate": 5.9938944000000003e-05, + "loss": 0.63, + "step": 50880 + }, + { + "epoch": 3.85, + "learning_rate": 5.9938872e-05, + "loss": 0.6242, + "step": 50940 + }, + { + "epoch": 3.85, + "learning_rate": 5.99388e-05, + "loss": 0.6344, + "step": 51000 + }, + { + "epoch": 3.86, + "learning_rate": 5.9938728e-05, + "loss": 0.6119, + "step": 51060 + }, + { + "epoch": 3.86, + "learning_rate": 5.9938656e-05, + "loss": 0.6137, + "step": 51120 + }, + { + "epoch": 3.86, + "learning_rate": 5.9938584e-05, + "loss": 0.6223, + "step": 51180 + }, + { + "epoch": 3.87, + "learning_rate": 5.9938512e-05, + "loss": 0.6276, + "step": 51240 + }, + { + "epoch": 3.87, + "learning_rate": 5.9938440000000005e-05, + "loss": 0.6314, + "step": 51300 + }, + { + "epoch": 3.88, + "learning_rate": 5.9938368e-05, + "loss": 0.6172, + "step": 51360 + }, + { + "epoch": 3.88, + "learning_rate": 5.9938296e-05, + "loss": 0.621, + "step": 51420 + }, + { + "epoch": 3.89, + "learning_rate": 5.9938224e-05, + "loss": 0.6131, + "step": 51480 + }, + { + "epoch": 3.89, + "learning_rate": 5.9938152000000004e-05, + "loss": 0.6138, + "step": 51540 + }, + { + "epoch": 3.9, + "learning_rate": 5.993808e-05, + "loss": 0.619, + "step": 51600 + }, + { + "epoch": 3.9, + "learning_rate": 5.9938008e-05, + "loss": 0.6062, + "step": 51660 + }, + { + "epoch": 3.9, + "learning_rate": 5.9937936e-05, + "loss": 0.6168, + "step": 51720 + }, + { + "epoch": 3.91, + "learning_rate": 5.9937864e-05, + "loss": 0.625, + "step": 51780 + }, + { + "epoch": 3.91, + "learning_rate": 5.9937792000000006e-05, + "loss": 0.6193, + "step": 51840 + }, + { + "epoch": 3.92, + "learning_rate": 5.993772e-05, + "loss": 0.61, + "step": 51900 + }, + { + "epoch": 3.92, + "learning_rate": 5.9937648e-05, + "loss": 0.6218, + "step": 51960 + }, + { + "epoch": 3.93, + "learning_rate": 5.9937576e-05, + "loss": 0.6248, + "step": 52020 + }, + { + "epoch": 3.93, + "learning_rate": 5.9937504000000004e-05, + "loss": 0.6125, + "step": 52080 + }, + { + "epoch": 3.94, + "learning_rate": 5.9937432e-05, + "loss": 0.631, + "step": 52140 + }, + { + "epoch": 3.94, + "learning_rate": 5.993736e-05, + "loss": 0.6121, + "step": 52200 + }, + { + "epoch": 3.95, + "learning_rate": 5.9937288e-05, + "loss": 0.6254, + "step": 52260 + }, + { + "epoch": 3.95, + "learning_rate": 5.9937216e-05, + "loss": 0.6245, + "step": 52320 + }, + { + "epoch": 3.95, + "learning_rate": 5.9937144e-05, + "loss": 0.6332, + "step": 52380 + }, + { + "epoch": 3.96, + "learning_rate": 5.9937072e-05, + "loss": 0.6206, + "step": 52440 + }, + { + "epoch": 3.96, + "learning_rate": 5.9937e-05, + "loss": 0.6181, + "step": 52500 + }, + { + "epoch": 3.97, + "learning_rate": 5.9936928e-05, + "loss": 0.6265, + "step": 52560 + }, + { + "epoch": 3.97, + "learning_rate": 5.9936856000000005e-05, + "loss": 0.622, + "step": 52620 + }, + { + "epoch": 3.98, + "learning_rate": 5.9936784e-05, + "loss": 0.628, + "step": 52680 + }, + { + "epoch": 3.98, + "learning_rate": 5.9936712e-05, + "loss": 0.6213, + "step": 52740 + }, + { + "epoch": 3.99, + "learning_rate": 5.993664000000001e-05, + "loss": 0.6229, + "step": 52800 + }, + { + "epoch": 3.99, + "learning_rate": 5.9936568000000004e-05, + "loss": 0.6177, + "step": 52860 + }, + { + "epoch": 4.0, + "learning_rate": 5.9936496e-05, + "loss": 0.6188, + "step": 52920 + }, + { + "epoch": 4.0, + "learning_rate": 5.9936423999999997e-05, + "loss": 0.617, + "step": 52980 + }, + { + "epoch": 4.0, + "eval_loss": 0.5853202939033508, + "eval_runtime": 337.718, + "eval_samples_per_second": 296.105, + "eval_steps_per_second": 2.316, + "step": 52980 + }, + { + "epoch": 4.0, + "learning_rate": 5.9936352000000006e-05, + "loss": 0.6146, + "step": 53040 + }, + { + "epoch": 4.01, + "learning_rate": 5.993628e-05, + "loss": 0.6101, + "step": 53100 + }, + { + "epoch": 4.01, + "learning_rate": 5.9936208e-05, + "loss": 0.6137, + "step": 53160 + }, + { + "epoch": 4.02, + "learning_rate": 5.9936136e-05, + "loss": 0.627, + "step": 53220 + }, + { + "epoch": 4.02, + "learning_rate": 5.9936064e-05, + "loss": 0.6259, + "step": 53280 + }, + { + "epoch": 4.03, + "learning_rate": 5.9935992e-05, + "loss": 0.6176, + "step": 53340 + }, + { + "epoch": 4.03, + "learning_rate": 5.9935920000000005e-05, + "loss": 0.6107, + "step": 53400 + }, + { + "epoch": 4.04, + "learning_rate": 5.9935848e-05, + "loss": 0.6131, + "step": 53460 + }, + { + "epoch": 4.04, + "learning_rate": 5.9935776e-05, + "loss": 0.6066, + "step": 53520 + }, + { + "epoch": 4.05, + "learning_rate": 5.993570400000001e-05, + "loss": 0.6153, + "step": 53580 + }, + { + "epoch": 4.05, + "learning_rate": 5.9935632000000003e-05, + "loss": 0.6267, + "step": 53640 + }, + { + "epoch": 4.05, + "learning_rate": 5.993556e-05, + "loss": 0.6112, + "step": 53700 + }, + { + "epoch": 4.06, + "learning_rate": 5.9935487999999996e-05, + "loss": 0.6191, + "step": 53760 + }, + { + "epoch": 4.06, + "learning_rate": 5.9935416000000006e-05, + "loss": 0.6286, + "step": 53820 + }, + { + "epoch": 4.07, + "learning_rate": 5.9935344e-05, + "loss": 0.6203, + "step": 53880 + }, + { + "epoch": 4.07, + "learning_rate": 5.9935272e-05, + "loss": 0.6165, + "step": 53940 + }, + { + "epoch": 4.08, + "learning_rate": 5.99352e-05, + "loss": 0.6054, + "step": 54000 + }, + { + "epoch": 4.08, + "learning_rate": 5.9935128e-05, + "loss": 0.617, + "step": 54060 + }, + { + "epoch": 4.09, + "learning_rate": 5.9935056e-05, + "loss": 0.6207, + "step": 54120 + }, + { + "epoch": 4.09, + "learning_rate": 5.9934984000000004e-05, + "loss": 0.6132, + "step": 54180 + }, + { + "epoch": 4.1, + "learning_rate": 5.9934912e-05, + "loss": 0.6157, + "step": 54240 + }, + { + "epoch": 4.1, + "learning_rate": 5.993484e-05, + "loss": 0.6141, + "step": 54300 + }, + { + "epoch": 4.1, + "learning_rate": 5.993476800000001e-05, + "loss": 0.6167, + "step": 54360 + }, + { + "epoch": 4.11, + "learning_rate": 5.9934696e-05, + "loss": 0.611, + "step": 54420 + }, + { + "epoch": 4.11, + "learning_rate": 5.9934624e-05, + "loss": 0.6081, + "step": 54480 + }, + { + "epoch": 4.12, + "learning_rate": 5.9934552e-05, + "loss": 0.6263, + "step": 54540 + }, + { + "epoch": 4.12, + "learning_rate": 5.9934480000000006e-05, + "loss": 0.6162, + "step": 54600 + }, + { + "epoch": 4.13, + "learning_rate": 5.9934408e-05, + "loss": 0.6106, + "step": 54660 + }, + { + "epoch": 4.13, + "learning_rate": 5.9934336e-05, + "loss": 0.62, + "step": 54720 + }, + { + "epoch": 4.14, + "learning_rate": 5.9934264e-05, + "loss": 0.6054, + "step": 54780 + }, + { + "epoch": 4.14, + "learning_rate": 5.9934192e-05, + "loss": 0.6121, + "step": 54840 + }, + { + "epoch": 4.14, + "learning_rate": 5.993412e-05, + "loss": 0.6164, + "step": 54900 + }, + { + "epoch": 4.15, + "learning_rate": 5.9934048000000004e-05, + "loss": 0.6188, + "step": 54960 + }, + { + "epoch": 4.15, + "learning_rate": 5.9933976e-05, + "loss": 0.61, + "step": 55020 + }, + { + "epoch": 4.16, + "learning_rate": 5.9933903999999996e-05, + "loss": 0.6161, + "step": 55080 + }, + { + "epoch": 4.16, + "learning_rate": 5.9933832000000006e-05, + "loss": 0.6067, + "step": 55140 + }, + { + "epoch": 4.17, + "learning_rate": 5.993376e-05, + "loss": 0.6, + "step": 55200 + }, + { + "epoch": 4.17, + "learning_rate": 5.9933688e-05, + "loss": 0.6144, + "step": 55260 + }, + { + "epoch": 4.18, + "learning_rate": 5.9933616e-05, + "loss": 0.6085, + "step": 55320 + }, + { + "epoch": 4.18, + "learning_rate": 5.9933544000000005e-05, + "loss": 0.6278, + "step": 55380 + }, + { + "epoch": 4.19, + "learning_rate": 5.9933472e-05, + "loss": 0.6108, + "step": 55440 + }, + { + "epoch": 4.19, + "learning_rate": 5.99334e-05, + "loss": 0.6203, + "step": 55500 + }, + { + "epoch": 4.19, + "learning_rate": 5.9933328e-05, + "loss": 0.6107, + "step": 55560 + }, + { + "epoch": 4.2, + "learning_rate": 5.9933256000000004e-05, + "loss": 0.6105, + "step": 55620 + }, + { + "epoch": 4.2, + "learning_rate": 5.9933184e-05, + "loss": 0.6239, + "step": 55680 + }, + { + "epoch": 4.21, + "learning_rate": 5.9933112000000003e-05, + "loss": 0.6145, + "step": 55740 + }, + { + "epoch": 4.21, + "learning_rate": 5.993304e-05, + "loss": 0.6135, + "step": 55800 + }, + { + "epoch": 4.22, + "learning_rate": 5.9932967999999996e-05, + "loss": 0.6032, + "step": 55860 + }, + { + "epoch": 4.22, + "learning_rate": 5.9932896000000006e-05, + "loss": 0.6047, + "step": 55920 + }, + { + "epoch": 4.23, + "learning_rate": 5.9932824e-05, + "loss": 0.6104, + "step": 55980 + }, + { + "epoch": 4.23, + "learning_rate": 5.9932752e-05, + "loss": 0.6119, + "step": 56040 + }, + { + "epoch": 4.24, + "learning_rate": 5.993268e-05, + "loss": 0.6132, + "step": 56100 + }, + { + "epoch": 4.24, + "learning_rate": 5.9932608000000005e-05, + "loss": 0.6061, + "step": 56160 + }, + { + "epoch": 4.24, + "learning_rate": 5.9932536e-05, + "loss": 0.6274, + "step": 56220 + }, + { + "epoch": 4.25, + "learning_rate": 5.9932464000000004e-05, + "loss": 0.6112, + "step": 56280 + }, + { + "epoch": 4.25, + "learning_rate": 5.9932392e-05, + "loss": 0.6236, + "step": 56340 + }, + { + "epoch": 4.26, + "learning_rate": 5.9932320000000004e-05, + "loss": 0.6004, + "step": 56400 + }, + { + "epoch": 4.26, + "learning_rate": 5.9932248e-05, + "loss": 0.6065, + "step": 56460 + }, + { + "epoch": 4.27, + "learning_rate": 5.9932176e-05, + "loss": 0.6039, + "step": 56520 + }, + { + "epoch": 4.27, + "learning_rate": 5.9932104e-05, + "loss": 0.608, + "step": 56580 + }, + { + "epoch": 4.28, + "learning_rate": 5.9932031999999996e-05, + "loss": 0.6132, + "step": 56640 + }, + { + "epoch": 4.28, + "learning_rate": 5.9931960000000005e-05, + "loss": 0.6049, + "step": 56700 + }, + { + "epoch": 4.29, + "learning_rate": 5.9931888e-05, + "loss": 0.6215, + "step": 56760 + }, + { + "epoch": 4.29, + "learning_rate": 5.9931816e-05, + "loss": 0.623, + "step": 56820 + }, + { + "epoch": 4.29, + "learning_rate": 5.9931744e-05, + "loss": 0.6257, + "step": 56880 + }, + { + "epoch": 4.3, + "learning_rate": 5.9931672000000004e-05, + "loss": 0.5961, + "step": 56940 + }, + { + "epoch": 4.3, + "learning_rate": 5.99316e-05, + "loss": 0.6128, + "step": 57000 + }, + { + "epoch": 4.31, + "learning_rate": 5.9931528000000004e-05, + "loss": 0.6245, + "step": 57060 + }, + { + "epoch": 4.31, + "learning_rate": 5.9931456e-05, + "loss": 0.6192, + "step": 57120 + }, + { + "epoch": 4.32, + "learning_rate": 5.9931384e-05, + "loss": 0.6096, + "step": 57180 + }, + { + "epoch": 4.32, + "learning_rate": 5.9931312000000006e-05, + "loss": 0.6255, + "step": 57240 + }, + { + "epoch": 4.33, + "learning_rate": 5.993124e-05, + "loss": 0.6119, + "step": 57300 + }, + { + "epoch": 4.33, + "learning_rate": 5.9931168e-05, + "loss": 0.6037, + "step": 57360 + }, + { + "epoch": 4.34, + "learning_rate": 5.9931095999999995e-05, + "loss": 0.606, + "step": 57420 + }, + { + "epoch": 4.34, + "learning_rate": 5.9931024000000005e-05, + "loss": 0.6059, + "step": 57480 + }, + { + "epoch": 4.34, + "learning_rate": 5.9930952e-05, + "loss": 0.614, + "step": 57540 + }, + { + "epoch": 4.35, + "learning_rate": 5.993088e-05, + "loss": 0.6036, + "step": 57600 + }, + { + "epoch": 4.35, + "learning_rate": 5.9930808e-05, + "loss": 0.6214, + "step": 57660 + }, + { + "epoch": 4.36, + "learning_rate": 5.9930736000000004e-05, + "loss": 0.6081, + "step": 57720 + }, + { + "epoch": 4.36, + "learning_rate": 5.9930664e-05, + "loss": 0.6236, + "step": 57780 + }, + { + "epoch": 4.37, + "learning_rate": 5.9930592e-05, + "loss": 0.6011, + "step": 57840 + }, + { + "epoch": 4.37, + "learning_rate": 5.993052e-05, + "loss": 0.6212, + "step": 57900 + }, + { + "epoch": 4.38, + "learning_rate": 5.9930448e-05, + "loss": 0.6054, + "step": 57960 + }, + { + "epoch": 4.38, + "learning_rate": 5.9930376000000006e-05, + "loss": 0.5944, + "step": 58020 + }, + { + "epoch": 4.39, + "learning_rate": 5.9930304e-05, + "loss": 0.6127, + "step": 58080 + }, + { + "epoch": 4.39, + "learning_rate": 5.9930232e-05, + "loss": 0.6217, + "step": 58140 + }, + { + "epoch": 4.39, + "learning_rate": 5.993016e-05, + "loss": 0.6095, + "step": 58200 + }, + { + "epoch": 4.4, + "learning_rate": 5.9930088000000005e-05, + "loss": 0.6231, + "step": 58260 + }, + { + "epoch": 4.4, + "learning_rate": 5.9930016e-05, + "loss": 0.6194, + "step": 58320 + }, + { + "epoch": 4.41, + "learning_rate": 5.9929944e-05, + "loss": 0.6093, + "step": 58380 + }, + { + "epoch": 4.41, + "learning_rate": 5.9929872e-05, + "loss": 0.6183, + "step": 58440 + }, + { + "epoch": 4.42, + "learning_rate": 5.9929800000000003e-05, + "loss": 0.6171, + "step": 58500 + }, + { + "epoch": 4.42, + "learning_rate": 5.9929728e-05, + "loss": 0.625, + "step": 58560 + }, + { + "epoch": 4.43, + "learning_rate": 5.9929656e-05, + "loss": 0.6203, + "step": 58620 + }, + { + "epoch": 4.43, + "learning_rate": 5.9929584e-05, + "loss": 0.6107, + "step": 58680 + }, + { + "epoch": 4.43, + "learning_rate": 5.9929512e-05, + "loss": 0.5989, + "step": 58740 + }, + { + "epoch": 4.44, + "learning_rate": 5.9929440000000005e-05, + "loss": 0.5933, + "step": 58800 + }, + { + "epoch": 4.44, + "learning_rate": 5.9929368e-05, + "loss": 0.6191, + "step": 58860 + }, + { + "epoch": 4.45, + "learning_rate": 5.9929296e-05, + "loss": 0.6147, + "step": 58920 + }, + { + "epoch": 4.45, + "learning_rate": 5.992922400000001e-05, + "loss": 0.6249, + "step": 58980 + }, + { + "epoch": 4.46, + "learning_rate": 5.9929152000000004e-05, + "loss": 0.6148, + "step": 59040 + }, + { + "epoch": 4.46, + "learning_rate": 5.992908e-05, + "loss": 0.6036, + "step": 59100 + }, + { + "epoch": 4.47, + "learning_rate": 5.9929008e-05, + "loss": 0.6136, + "step": 59160 + }, + { + "epoch": 4.47, + "learning_rate": 5.9928936e-05, + "loss": 0.6098, + "step": 59220 + }, + { + "epoch": 4.48, + "learning_rate": 5.9928864e-05, + "loss": 0.6076, + "step": 59280 + }, + { + "epoch": 4.48, + "learning_rate": 5.9928792e-05, + "loss": 0.619, + "step": 59340 + }, + { + "epoch": 4.48, + "learning_rate": 5.992872e-05, + "loss": 0.6106, + "step": 59400 + }, + { + "epoch": 4.49, + "learning_rate": 5.9928648e-05, + "loss": 0.6103, + "step": 59460 + }, + { + "epoch": 4.49, + "learning_rate": 5.9928576e-05, + "loss": 0.6068, + "step": 59520 + }, + { + "epoch": 4.5, + "learning_rate": 5.9928504000000005e-05, + "loss": 0.6042, + "step": 59580 + }, + { + "epoch": 4.5, + "learning_rate": 5.9928432e-05, + "loss": 0.611, + "step": 59640 + }, + { + "epoch": 4.51, + "learning_rate": 5.992836e-05, + "loss": 0.6143, + "step": 59700 + }, + { + "epoch": 4.51, + "learning_rate": 5.992828800000001e-05, + "loss": 0.6106, + "step": 59760 + }, + { + "epoch": 4.52, + "learning_rate": 5.9928216000000004e-05, + "loss": 0.6212, + "step": 59820 + }, + { + "epoch": 4.52, + "learning_rate": 5.9928144e-05, + "loss": 0.6157, + "step": 59880 + }, + { + "epoch": 4.53, + "learning_rate": 5.9928071999999996e-05, + "loss": 0.6149, + "step": 59940 + }, + { + "epoch": 4.53, + "learning_rate": 5.9928e-05, + "loss": 0.6099, + "step": 60000 + }, + { + "epoch": 4.53, + "learning_rate": 5.9927928e-05, + "loss": 0.6193, + "step": 60060 + }, + { + "epoch": 4.54, + "learning_rate": 5.9927856e-05, + "loss": 0.6131, + "step": 60120 + }, + { + "epoch": 4.54, + "learning_rate": 5.9927784e-05, + "loss": 0.6095, + "step": 60180 + }, + { + "epoch": 4.55, + "learning_rate": 5.9927712e-05, + "loss": 0.6089, + "step": 60240 + }, + { + "epoch": 4.55, + "learning_rate": 5.992764e-05, + "loss": 0.6218, + "step": 60300 + }, + { + "epoch": 4.56, + "learning_rate": 5.9927568000000005e-05, + "loss": 0.6144, + "step": 60360 + }, + { + "epoch": 4.56, + "learning_rate": 5.9927496e-05, + "loss": 0.6125, + "step": 60420 + }, + { + "epoch": 4.57, + "learning_rate": 5.9927424e-05, + "loss": 0.6134, + "step": 60480 + }, + { + "epoch": 4.57, + "learning_rate": 5.992735200000001e-05, + "loss": 0.6046, + "step": 60540 + }, + { + "epoch": 4.58, + "learning_rate": 5.992728e-05, + "loss": 0.5963, + "step": 60600 + }, + { + "epoch": 4.58, + "learning_rate": 5.9927208e-05, + "loss": 0.6006, + "step": 60660 + }, + { + "epoch": 4.58, + "learning_rate": 5.9927136e-05, + "loss": 0.6011, + "step": 60720 + }, + { + "epoch": 4.59, + "learning_rate": 5.9927064000000006e-05, + "loss": 0.6008, + "step": 60780 + }, + { + "epoch": 4.59, + "learning_rate": 5.9926992e-05, + "loss": 0.6021, + "step": 60840 + }, + { + "epoch": 4.6, + "learning_rate": 5.992692e-05, + "loss": 0.6111, + "step": 60900 + }, + { + "epoch": 4.6, + "learning_rate": 5.9926848e-05, + "loss": 0.606, + "step": 60960 + }, + { + "epoch": 4.61, + "learning_rate": 5.9926776e-05, + "loss": 0.6035, + "step": 61020 + }, + { + "epoch": 4.61, + "learning_rate": 5.9926704e-05, + "loss": 0.6108, + "step": 61080 + }, + { + "epoch": 4.62, + "learning_rate": 5.9926632000000004e-05, + "loss": 0.6027, + "step": 61140 + }, + { + "epoch": 4.62, + "learning_rate": 5.992656e-05, + "loss": 0.6077, + "step": 61200 + }, + { + "epoch": 4.63, + "learning_rate": 5.9926488e-05, + "loss": 0.6051, + "step": 61260 + }, + { + "epoch": 4.63, + "learning_rate": 5.9926416000000007e-05, + "loss": 0.6091, + "step": 61320 + }, + { + "epoch": 4.63, + "learning_rate": 5.9926344e-05, + "loss": 0.5969, + "step": 61380 + }, + { + "epoch": 4.64, + "learning_rate": 5.9926272e-05, + "loss": 0.6132, + "step": 61440 + }, + { + "epoch": 4.64, + "learning_rate": 5.99262e-05, + "loss": 0.6272, + "step": 61500 + }, + { + "epoch": 4.65, + "learning_rate": 5.9926128000000005e-05, + "loss": 0.6204, + "step": 61560 + }, + { + "epoch": 4.65, + "learning_rate": 5.9926056e-05, + "loss": 0.6178, + "step": 61620 + }, + { + "epoch": 4.66, + "learning_rate": 5.9925984000000005e-05, + "loss": 0.6102, + "step": 61680 + }, + { + "epoch": 4.66, + "learning_rate": 5.9925912e-05, + "loss": 0.6125, + "step": 61740 + }, + { + "epoch": 4.67, + "learning_rate": 5.992584e-05, + "loss": 0.6055, + "step": 61800 + }, + { + "epoch": 4.67, + "learning_rate": 5.9925768e-05, + "loss": 0.5978, + "step": 61860 + }, + { + "epoch": 4.67, + "learning_rate": 5.9925696000000004e-05, + "loss": 0.6069, + "step": 61920 + }, + { + "epoch": 4.68, + "learning_rate": 5.9925624e-05, + "loss": 0.6087, + "step": 61980 + }, + { + "epoch": 4.68, + "learning_rate": 5.9925551999999996e-05, + "loss": 0.6163, + "step": 62040 + }, + { + "epoch": 4.69, + "learning_rate": 5.9925480000000006e-05, + "loss": 0.6218, + "step": 62100 + }, + { + "epoch": 4.69, + "learning_rate": 5.9925408e-05, + "loss": 0.6101, + "step": 62160 + }, + { + "epoch": 4.7, + "learning_rate": 5.9925336e-05, + "loss": 0.6122, + "step": 62220 + }, + { + "epoch": 4.7, + "learning_rate": 5.9925264e-05, + "loss": 0.6148, + "step": 62280 + }, + { + "epoch": 4.71, + "learning_rate": 5.9925192000000005e-05, + "loss": 0.6066, + "step": 62340 + }, + { + "epoch": 4.71, + "learning_rate": 5.992512e-05, + "loss": 0.6138, + "step": 62400 + }, + { + "epoch": 4.72, + "learning_rate": 5.9925048000000004e-05, + "loss": 0.6071, + "step": 62460 + }, + { + "epoch": 4.72, + "learning_rate": 5.9924976e-05, + "loss": 0.6175, + "step": 62520 + }, + { + "epoch": 4.72, + "learning_rate": 5.9924904e-05, + "loss": 0.6083, + "step": 62580 + }, + { + "epoch": 4.73, + "learning_rate": 5.9924832e-05, + "loss": 0.6173, + "step": 62640 + }, + { + "epoch": 4.73, + "learning_rate": 5.992476e-05, + "loss": 0.6068, + "step": 62700 + }, + { + "epoch": 4.74, + "learning_rate": 5.9924688e-05, + "loss": 0.6134, + "step": 62760 + }, + { + "epoch": 4.74, + "learning_rate": 5.9924615999999996e-05, + "loss": 0.6055, + "step": 62820 + }, + { + "epoch": 4.75, + "learning_rate": 5.9924544000000006e-05, + "loss": 0.6122, + "step": 62880 + }, + { + "epoch": 4.75, + "learning_rate": 5.9924472e-05, + "loss": 0.6018, + "step": 62940 + }, + { + "epoch": 4.76, + "learning_rate": 5.99244e-05, + "loss": 0.5996, + "step": 63000 + }, + { + "epoch": 4.76, + "learning_rate": 5.9924328e-05, + "loss": 0.6241, + "step": 63060 + }, + { + "epoch": 4.77, + "learning_rate": 5.9924256000000005e-05, + "loss": 0.5985, + "step": 63120 + }, + { + "epoch": 4.77, + "learning_rate": 5.9924184e-05, + "loss": 0.6062, + "step": 63180 + }, + { + "epoch": 4.77, + "learning_rate": 5.9924112000000004e-05, + "loss": 0.5962, + "step": 63240 + }, + { + "epoch": 4.78, + "learning_rate": 5.992404e-05, + "loss": 0.5966, + "step": 63300 + }, + { + "epoch": 4.78, + "learning_rate": 5.9923968000000003e-05, + "loss": 0.5923, + "step": 63360 + }, + { + "epoch": 4.79, + "learning_rate": 5.9923896000000006e-05, + "loss": 0.6207, + "step": 63420 + }, + { + "epoch": 4.79, + "learning_rate": 5.9923824e-05, + "loss": 0.61, + "step": 63480 + }, + { + "epoch": 4.8, + "learning_rate": 5.9923752e-05, + "loss": 0.6009, + "step": 63540 + }, + { + "epoch": 4.8, + "learning_rate": 5.9923679999999995e-05, + "loss": 0.6125, + "step": 63600 + }, + { + "epoch": 4.81, + "learning_rate": 5.9923608000000005e-05, + "loss": 0.5973, + "step": 63660 + }, + { + "epoch": 4.81, + "learning_rate": 5.9923536e-05, + "loss": 0.6041, + "step": 63720 + }, + { + "epoch": 4.82, + "learning_rate": 5.9923464e-05, + "loss": 0.6064, + "step": 63780 + }, + { + "epoch": 4.82, + "learning_rate": 5.9923392e-05, + "loss": 0.5993, + "step": 63840 + }, + { + "epoch": 4.82, + "learning_rate": 5.9923320000000004e-05, + "loss": 0.6038, + "step": 63900 + }, + { + "epoch": 4.83, + "learning_rate": 5.9923248e-05, + "loss": 0.6001, + "step": 63960 + }, + { + "epoch": 4.83, + "learning_rate": 5.9923176000000004e-05, + "loss": 0.6085, + "step": 64020 + }, + { + "epoch": 4.84, + "learning_rate": 5.9923104e-05, + "loss": 0.6034, + "step": 64080 + }, + { + "epoch": 4.84, + "learning_rate": 5.9923032e-05, + "loss": 0.6153, + "step": 64140 + }, + { + "epoch": 4.85, + "learning_rate": 5.9922960000000006e-05, + "loss": 0.5958, + "step": 64200 + }, + { + "epoch": 4.85, + "learning_rate": 5.9922888e-05, + "loss": 0.6085, + "step": 64260 + }, + { + "epoch": 4.86, + "learning_rate": 5.9922816e-05, + "loss": 0.6181, + "step": 64320 + }, + { + "epoch": 4.86, + "learning_rate": 5.9922743999999995e-05, + "loss": 0.6082, + "step": 64380 + }, + { + "epoch": 4.87, + "learning_rate": 5.9922672000000005e-05, + "loss": 0.6192, + "step": 64440 + }, + { + "epoch": 4.87, + "learning_rate": 5.99226e-05, + "loss": 0.5981, + "step": 64500 + }, + { + "epoch": 4.87, + "learning_rate": 5.9922528e-05, + "loss": 0.6103, + "step": 64560 + }, + { + "epoch": 4.88, + "learning_rate": 5.9922456e-05, + "loss": 0.6098, + "step": 64620 + }, + { + "epoch": 4.88, + "learning_rate": 5.9922384000000004e-05, + "loss": 0.6172, + "step": 64680 + }, + { + "epoch": 4.89, + "learning_rate": 5.9922312e-05, + "loss": 0.5967, + "step": 64740 + }, + { + "epoch": 4.89, + "learning_rate": 5.992224e-05, + "loss": 0.6232, + "step": 64800 + }, + { + "epoch": 4.9, + "learning_rate": 5.9922168e-05, + "loss": 0.6128, + "step": 64860 + }, + { + "epoch": 4.9, + "learning_rate": 5.9922096e-05, + "loss": 0.6088, + "step": 64920 + }, + { + "epoch": 4.91, + "learning_rate": 5.9922024000000006e-05, + "loss": 0.6025, + "step": 64980 + }, + { + "epoch": 4.91, + "learning_rate": 5.9921952e-05, + "loss": 0.6145, + "step": 65040 + }, + { + "epoch": 4.92, + "learning_rate": 5.992188e-05, + "loss": 0.6092, + "step": 65100 + }, + { + "epoch": 4.92, + "learning_rate": 5.9921808e-05, + "loss": 0.6028, + "step": 65160 + }, + { + "epoch": 4.92, + "learning_rate": 5.9921736000000004e-05, + "loss": 0.5969, + "step": 65220 + }, + { + "epoch": 4.93, + "learning_rate": 5.9921664e-05, + "loss": 0.594, + "step": 65280 + }, + { + "epoch": 4.93, + "learning_rate": 5.9921592e-05, + "loss": 0.6074, + "step": 65340 + }, + { + "epoch": 4.94, + "learning_rate": 5.992152e-05, + "loss": 0.6072, + "step": 65400 + }, + { + "epoch": 4.94, + "learning_rate": 5.9921448e-05, + "loss": 0.6098, + "step": 65460 + }, + { + "epoch": 4.95, + "learning_rate": 5.9921376e-05, + "loss": 0.6043, + "step": 65520 + }, + { + "epoch": 4.95, + "learning_rate": 5.9921304e-05, + "loss": 0.6058, + "step": 65580 + }, + { + "epoch": 4.96, + "learning_rate": 5.9921232e-05, + "loss": 0.6024, + "step": 65640 + }, + { + "epoch": 4.96, + "learning_rate": 5.992116e-05, + "loss": 0.6055, + "step": 65700 + }, + { + "epoch": 4.96, + "learning_rate": 5.9921088000000005e-05, + "loss": 0.606, + "step": 65760 + }, + { + "epoch": 4.97, + "learning_rate": 5.9921016e-05, + "loss": 0.6016, + "step": 65820 + }, + { + "epoch": 4.97, + "learning_rate": 5.9920944e-05, + "loss": 0.61, + "step": 65880 + }, + { + "epoch": 4.98, + "learning_rate": 5.9920872e-05, + "loss": 0.6045, + "step": 65940 + }, + { + "epoch": 4.98, + "learning_rate": 5.9920800000000004e-05, + "loss": 0.6068, + "step": 66000 + }, + { + "epoch": 4.99, + "learning_rate": 5.9920728e-05, + "loss": 0.6096, + "step": 66060 + }, + { + "epoch": 4.99, + "learning_rate": 5.9920656000000003e-05, + "loss": 0.6188, + "step": 66120 + }, + { + "epoch": 5.0, + "learning_rate": 5.9920584e-05, + "loss": 0.6049, + "step": 66180 + }, + { + "epoch": 5.0, + "eval_loss": 0.5711187720298767, + "eval_runtime": 338.4117, + "eval_samples_per_second": 295.498, + "eval_steps_per_second": 2.311, + "step": 66225 + }, + { + "epoch": 5.0, + "learning_rate": 5.9920512e-05, + "loss": 0.5971, + "step": 66240 + }, + { + "epoch": 5.01, + "learning_rate": 5.992044e-05, + "loss": 0.6038, + "step": 66300 + }, + { + "epoch": 5.01, + "learning_rate": 5.9920368e-05, + "loss": 0.6169, + "step": 66360 + }, + { + "epoch": 5.01, + "learning_rate": 5.9920296e-05, + "loss": 0.5918, + "step": 66420 + }, + { + "epoch": 5.02, + "learning_rate": 5.9920224e-05, + "loss": 0.5963, + "step": 66480 + }, + { + "epoch": 5.02, + "learning_rate": 5.9920152000000005e-05, + "loss": 0.5814, + "step": 66540 + }, + { + "epoch": 5.03, + "learning_rate": 5.992008e-05, + "loss": 0.5906, + "step": 66600 + }, + { + "epoch": 5.03, + "learning_rate": 5.9920008e-05, + "loss": 0.6049, + "step": 66660 + }, + { + "epoch": 5.04, + "learning_rate": 5.991993600000001e-05, + "loss": 0.5876, + "step": 66720 + }, + { + "epoch": 5.04, + "learning_rate": 5.9919864000000004e-05, + "loss": 0.6058, + "step": 66780 + }, + { + "epoch": 5.05, + "learning_rate": 5.9919792e-05, + "loss": 0.5979, + "step": 66840 + }, + { + "epoch": 5.05, + "learning_rate": 5.991972e-05, + "loss": 0.6152, + "step": 66900 + }, + { + "epoch": 5.06, + "learning_rate": 5.9919648e-05, + "loss": 0.5958, + "step": 66960 + }, + { + "epoch": 5.06, + "learning_rate": 5.9919576e-05, + "loss": 0.5936, + "step": 67020 + }, + { + "epoch": 5.06, + "learning_rate": 5.9919504e-05, + "loss": 0.6238, + "step": 67080 + }, + { + "epoch": 5.07, + "learning_rate": 5.9919432e-05, + "loss": 0.5909, + "step": 67140 + }, + { + "epoch": 5.07, + "learning_rate": 5.991936e-05, + "loss": 0.5929, + "step": 67200 + }, + { + "epoch": 5.08, + "learning_rate": 5.9919288e-05, + "loss": 0.5922, + "step": 67260 + }, + { + "epoch": 5.08, + "learning_rate": 5.9919216000000004e-05, + "loss": 0.5987, + "step": 67320 + }, + { + "epoch": 5.09, + "learning_rate": 5.9919144e-05, + "loss": 0.6068, + "step": 67380 + }, + { + "epoch": 5.09, + "learning_rate": 5.9919072e-05, + "loss": 0.6013, + "step": 67440 + }, + { + "epoch": 5.1, + "learning_rate": 5.991900000000001e-05, + "loss": 0.594, + "step": 67500 + }, + { + "epoch": 5.1, + "learning_rate": 5.9918928e-05, + "loss": 0.6135, + "step": 67560 + }, + { + "epoch": 5.11, + "learning_rate": 5.9918856e-05, + "loss": 0.5962, + "step": 67620 + }, + { + "epoch": 5.11, + "learning_rate": 5.9918784e-05, + "loss": 0.5978, + "step": 67680 + }, + { + "epoch": 5.11, + "learning_rate": 5.9918712e-05, + "loss": 0.6028, + "step": 67740 + }, + { + "epoch": 5.12, + "learning_rate": 5.991864e-05, + "loss": 0.598, + "step": 67800 + }, + { + "epoch": 5.12, + "learning_rate": 5.9918568000000005e-05, + "loss": 0.5986, + "step": 67860 + }, + { + "epoch": 5.13, + "learning_rate": 5.9918496e-05, + "loss": 0.5942, + "step": 67920 + }, + { + "epoch": 5.13, + "learning_rate": 5.9918424e-05, + "loss": 0.5942, + "step": 67980 + }, + { + "epoch": 5.14, + "learning_rate": 5.9918352e-05, + "loss": 0.5838, + "step": 68040 + }, + { + "epoch": 5.14, + "learning_rate": 5.9918280000000004e-05, + "loss": 0.5969, + "step": 68100 + }, + { + "epoch": 5.15, + "learning_rate": 5.9918208e-05, + "loss": 0.5888, + "step": 68160 + }, + { + "epoch": 5.15, + "learning_rate": 5.9918135999999997e-05, + "loss": 0.6135, + "step": 68220 + }, + { + "epoch": 5.16, + "learning_rate": 5.9918064000000006e-05, + "loss": 0.5873, + "step": 68280 + }, + { + "epoch": 5.16, + "learning_rate": 5.9917992e-05, + "loss": 0.5871, + "step": 68340 + }, + { + "epoch": 5.16, + "learning_rate": 5.991792e-05, + "loss": 0.6008, + "step": 68400 + }, + { + "epoch": 5.17, + "learning_rate": 5.9917848e-05, + "loss": 0.5999, + "step": 68460 + }, + { + "epoch": 5.17, + "learning_rate": 5.9917776000000005e-05, + "loss": 0.5977, + "step": 68520 + }, + { + "epoch": 5.18, + "learning_rate": 5.9917704e-05, + "loss": 0.6002, + "step": 68580 + }, + { + "epoch": 5.18, + "learning_rate": 5.9917632000000005e-05, + "loss": 0.6012, + "step": 68640 + }, + { + "epoch": 5.19, + "learning_rate": 5.991756e-05, + "loss": 0.5976, + "step": 68700 + }, + { + "epoch": 5.19, + "learning_rate": 5.9917488e-05, + "loss": 0.5938, + "step": 68760 + }, + { + "epoch": 5.2, + "learning_rate": 5.9917416e-05, + "loss": 0.6128, + "step": 68820 + }, + { + "epoch": 5.2, + "learning_rate": 5.9917344000000004e-05, + "loss": 0.604, + "step": 68880 + }, + { + "epoch": 5.2, + "learning_rate": 5.9917272e-05, + "loss": 0.6021, + "step": 68940 + }, + { + "epoch": 5.21, + "learning_rate": 5.9917199999999996e-05, + "loss": 0.5859, + "step": 69000 + }, + { + "epoch": 5.21, + "learning_rate": 5.9917128000000006e-05, + "loss": 0.5943, + "step": 69060 + }, + { + "epoch": 5.22, + "learning_rate": 5.9917056e-05, + "loss": 0.5914, + "step": 69120 + }, + { + "epoch": 5.22, + "learning_rate": 5.9916984e-05, + "loss": 0.6021, + "step": 69180 + }, + { + "epoch": 5.23, + "learning_rate": 5.9916912e-05, + "loss": 0.5977, + "step": 69240 + }, + { + "epoch": 5.23, + "learning_rate": 5.9916840000000005e-05, + "loss": 0.5947, + "step": 69300 + }, + { + "epoch": 5.24, + "learning_rate": 5.9916768e-05, + "loss": 0.601, + "step": 69360 + }, + { + "epoch": 5.24, + "learning_rate": 5.9916696000000004e-05, + "loss": 0.6036, + "step": 69420 + }, + { + "epoch": 5.25, + "learning_rate": 5.9916624e-05, + "loss": 0.5976, + "step": 69480 + }, + { + "epoch": 5.25, + "learning_rate": 5.9916552e-05, + "loss": 0.6009, + "step": 69540 + }, + { + "epoch": 5.25, + "learning_rate": 5.991648000000001e-05, + "loss": 0.6, + "step": 69600 + }, + { + "epoch": 5.26, + "learning_rate": 5.9916408e-05, + "loss": 0.5974, + "step": 69660 + }, + { + "epoch": 5.26, + "learning_rate": 5.9916336e-05, + "loss": 0.5969, + "step": 69720 + }, + { + "epoch": 5.27, + "learning_rate": 5.9916263999999996e-05, + "loss": 0.5927, + "step": 69780 + }, + { + "epoch": 5.27, + "learning_rate": 5.9916192000000006e-05, + "loss": 0.5953, + "step": 69840 + }, + { + "epoch": 5.28, + "learning_rate": 5.991612e-05, + "loss": 0.5962, + "step": 69900 + }, + { + "epoch": 5.28, + "learning_rate": 5.9916048e-05, + "loss": 0.6006, + "step": 69960 + }, + { + "epoch": 5.29, + "learning_rate": 5.9915976e-05, + "loss": 0.6094, + "step": 70020 + }, + { + "epoch": 5.29, + "learning_rate": 5.9915904000000004e-05, + "loss": 0.6063, + "step": 70080 + }, + { + "epoch": 5.3, + "learning_rate": 5.9915832e-05, + "loss": 0.6011, + "step": 70140 + }, + { + "epoch": 5.3, + "learning_rate": 5.9915760000000004e-05, + "loss": 0.6211, + "step": 70200 + }, + { + "epoch": 5.3, + "learning_rate": 5.9915688e-05, + "loss": 0.5883, + "step": 70260 + }, + { + "epoch": 5.31, + "learning_rate": 5.9915615999999996e-05, + "loss": 0.5907, + "step": 70320 + }, + { + "epoch": 5.31, + "learning_rate": 5.9915544000000006e-05, + "loss": 0.5849, + "step": 70380 + }, + { + "epoch": 5.32, + "learning_rate": 5.9915472e-05, + "loss": 0.5953, + "step": 70440 + }, + { + "epoch": 5.32, + "learning_rate": 5.99154e-05, + "loss": 0.6026, + "step": 70500 + }, + { + "epoch": 5.33, + "learning_rate": 5.9915328e-05, + "loss": 0.5977, + "step": 70560 + }, + { + "epoch": 5.33, + "learning_rate": 5.9915256000000005e-05, + "loss": 0.5908, + "step": 70620 + }, + { + "epoch": 5.34, + "learning_rate": 5.9915184e-05, + "loss": 0.5916, + "step": 70680 + }, + { + "epoch": 5.34, + "learning_rate": 5.9915112e-05, + "loss": 0.6072, + "step": 70740 + }, + { + "epoch": 5.35, + "learning_rate": 5.991504e-05, + "loss": 0.6063, + "step": 70800 + }, + { + "epoch": 5.35, + "learning_rate": 5.9914968000000004e-05, + "loss": 0.6131, + "step": 70860 + }, + { + "epoch": 5.35, + "learning_rate": 5.9914896e-05, + "loss": 0.606, + "step": 70920 + }, + { + "epoch": 5.36, + "learning_rate": 5.9914824000000003e-05, + "loss": 0.6085, + "step": 70980 + }, + { + "epoch": 5.36, + "learning_rate": 5.9914752e-05, + "loss": 0.5879, + "step": 71040 + }, + { + "epoch": 5.37, + "learning_rate": 5.991468e-05, + "loss": 0.6012, + "step": 71100 + }, + { + "epoch": 5.37, + "learning_rate": 5.9914608000000006e-05, + "loss": 0.5996, + "step": 71160 + }, + { + "epoch": 5.38, + "learning_rate": 5.9914536e-05, + "loss": 0.609, + "step": 71220 + }, + { + "epoch": 5.38, + "learning_rate": 5.9914464e-05, + "loss": 0.5972, + "step": 71280 + }, + { + "epoch": 5.39, + "learning_rate": 5.9914392e-05, + "loss": 0.5933, + "step": 71340 + }, + { + "epoch": 5.39, + "learning_rate": 5.9914320000000005e-05, + "loss": 0.6043, + "step": 71400 + }, + { + "epoch": 5.4, + "learning_rate": 5.9914248e-05, + "loss": 0.5974, + "step": 71460 + }, + { + "epoch": 5.4, + "learning_rate": 5.9914176e-05, + "loss": 0.6019, + "step": 71520 + }, + { + "epoch": 5.4, + "learning_rate": 5.9914104e-05, + "loss": 0.5936, + "step": 71580 + }, + { + "epoch": 5.41, + "learning_rate": 5.9914032000000004e-05, + "loss": 0.6024, + "step": 71640 + }, + { + "epoch": 5.41, + "learning_rate": 5.991396e-05, + "loss": 0.5902, + "step": 71700 + }, + { + "epoch": 5.42, + "learning_rate": 5.9913888e-05, + "loss": 0.5972, + "step": 71760 + }, + { + "epoch": 5.42, + "learning_rate": 5.9913816e-05, + "loss": 0.6076, + "step": 71820 + }, + { + "epoch": 5.43, + "learning_rate": 5.9913744e-05, + "loss": 0.5947, + "step": 71880 + }, + { + "epoch": 5.43, + "learning_rate": 5.9913672000000005e-05, + "loss": 0.6034, + "step": 71940 + }, + { + "epoch": 5.44, + "learning_rate": 5.99136e-05, + "loss": 0.578, + "step": 72000 + }, + { + "epoch": 5.44, + "learning_rate": 5.9913528e-05, + "loss": 0.5964, + "step": 72060 + }, + { + "epoch": 5.45, + "learning_rate": 5.9913456e-05, + "loss": 0.6042, + "step": 72120 + }, + { + "epoch": 5.45, + "learning_rate": 5.9913384000000004e-05, + "loss": 0.5839, + "step": 72180 + }, + { + "epoch": 5.45, + "learning_rate": 5.9913312e-05, + "loss": 0.592, + "step": 72240 + }, + { + "epoch": 5.46, + "learning_rate": 5.9913240000000004e-05, + "loss": 0.5976, + "step": 72300 + }, + { + "epoch": 5.46, + "learning_rate": 5.9913168e-05, + "loss": 0.6012, + "step": 72360 + }, + { + "epoch": 5.47, + "learning_rate": 5.9913096e-05, + "loss": 0.608, + "step": 72420 + }, + { + "epoch": 5.47, + "learning_rate": 5.9913024e-05, + "loss": 0.5921, + "step": 72480 + }, + { + "epoch": 5.48, + "learning_rate": 5.9912952e-05, + "loss": 0.5931, + "step": 72540 + }, + { + "epoch": 5.48, + "learning_rate": 5.991288e-05, + "loss": 0.5975, + "step": 72600 + }, + { + "epoch": 5.49, + "learning_rate": 5.9912808e-05, + "loss": 0.5962, + "step": 72660 + }, + { + "epoch": 5.49, + "learning_rate": 5.9912736000000005e-05, + "loss": 0.5797, + "step": 72720 + }, + { + "epoch": 5.49, + "learning_rate": 5.9912664e-05, + "loss": 0.6069, + "step": 72780 + }, + { + "epoch": 5.5, + "learning_rate": 5.9912592e-05, + "loss": 0.6101, + "step": 72840 + }, + { + "epoch": 5.5, + "learning_rate": 5.991252e-05, + "loss": 0.5948, + "step": 72900 + }, + { + "epoch": 5.51, + "learning_rate": 5.9912448000000004e-05, + "loss": 0.585, + "step": 72960 + }, + { + "epoch": 5.51, + "learning_rate": 5.9912376e-05, + "loss": 0.5822, + "step": 73020 + }, + { + "epoch": 5.52, + "learning_rate": 5.9912304e-05, + "loss": 0.5928, + "step": 73080 + }, + { + "epoch": 5.52, + "learning_rate": 5.9912232e-05, + "loss": 0.5937, + "step": 73140 + }, + { + "epoch": 5.53, + "learning_rate": 5.991216e-05, + "loss": 0.5935, + "step": 73200 + }, + { + "epoch": 5.53, + "learning_rate": 5.9912088e-05, + "loss": 0.6086, + "step": 73260 + }, + { + "epoch": 5.54, + "learning_rate": 5.9912016e-05, + "loss": 0.5861, + "step": 73320 + }, + { + "epoch": 5.54, + "learning_rate": 5.9911944e-05, + "loss": 0.6044, + "step": 73380 + }, + { + "epoch": 5.54, + "learning_rate": 5.9911872e-05, + "loss": 0.5965, + "step": 73440 + }, + { + "epoch": 5.55, + "learning_rate": 5.9911800000000005e-05, + "loss": 0.6038, + "step": 73500 + }, + { + "epoch": 5.55, + "learning_rate": 5.9911728e-05, + "loss": 0.6069, + "step": 73560 + }, + { + "epoch": 5.56, + "learning_rate": 5.9911656e-05, + "loss": 0.5932, + "step": 73620 + }, + { + "epoch": 5.56, + "learning_rate": 5.9911584e-05, + "loss": 0.5989, + "step": 73680 + }, + { + "epoch": 5.57, + "learning_rate": 5.9911512000000003e-05, + "loss": 0.5797, + "step": 73740 + }, + { + "epoch": 5.57, + "learning_rate": 5.991144e-05, + "loss": 0.5895, + "step": 73800 + }, + { + "epoch": 5.58, + "learning_rate": 5.9911368e-05, + "loss": 0.5978, + "step": 73860 + }, + { + "epoch": 5.58, + "learning_rate": 5.9911296e-05, + "loss": 0.6056, + "step": 73920 + }, + { + "epoch": 5.59, + "learning_rate": 5.9911224e-05, + "loss": 0.5696, + "step": 73980 + }, + { + "epoch": 5.59, + "learning_rate": 5.9911152000000005e-05, + "loss": 0.6003, + "step": 74040 + }, + { + "epoch": 5.59, + "learning_rate": 5.991108e-05, + "loss": 0.5911, + "step": 74100 + }, + { + "epoch": 5.6, + "learning_rate": 5.9911008e-05, + "loss": 0.5946, + "step": 74160 + }, + { + "epoch": 5.6, + "learning_rate": 5.9910936e-05, + "loss": 0.5974, + "step": 74220 + }, + { + "epoch": 5.61, + "learning_rate": 5.9910864000000004e-05, + "loss": 0.5955, + "step": 74280 + }, + { + "epoch": 5.61, + "learning_rate": 5.9910792e-05, + "loss": 0.6124, + "step": 74340 + }, + { + "epoch": 5.62, + "learning_rate": 5.991072e-05, + "loss": 0.5952, + "step": 74400 + }, + { + "epoch": 5.62, + "learning_rate": 5.991064800000001e-05, + "loss": 0.5927, + "step": 74460 + }, + { + "epoch": 5.63, + "learning_rate": 5.9910576e-05, + "loss": 0.5917, + "step": 74520 + }, + { + "epoch": 5.63, + "learning_rate": 5.9910504e-05, + "loss": 0.592, + "step": 74580 + }, + { + "epoch": 5.64, + "learning_rate": 5.9910432e-05, + "loss": 0.5965, + "step": 74640 + }, + { + "epoch": 5.64, + "learning_rate": 5.991036e-05, + "loss": 0.6002, + "step": 74700 + }, + { + "epoch": 5.64, + "learning_rate": 5.9910288e-05, + "loss": 0.5959, + "step": 74760 + }, + { + "epoch": 5.65, + "learning_rate": 5.9910216000000005e-05, + "loss": 0.5998, + "step": 74820 + }, + { + "epoch": 5.65, + "learning_rate": 5.9910144e-05, + "loss": 0.581, + "step": 74880 + }, + { + "epoch": 5.66, + "learning_rate": 5.9910072e-05, + "loss": 0.5939, + "step": 74940 + }, + { + "epoch": 5.66, + "learning_rate": 5.991000000000001e-05, + "loss": 0.5856, + "step": 75000 + }, + { + "epoch": 5.67, + "learning_rate": 5.9909928000000004e-05, + "loss": 0.5912, + "step": 75060 + }, + { + "epoch": 5.67, + "learning_rate": 5.9909856e-05, + "loss": 0.6011, + "step": 75120 + }, + { + "epoch": 5.68, + "learning_rate": 5.9909783999999996e-05, + "loss": 0.6042, + "step": 75180 + }, + { + "epoch": 5.68, + "learning_rate": 5.9909712000000006e-05, + "loss": 0.6024, + "step": 75240 + }, + { + "epoch": 5.69, + "learning_rate": 5.990964e-05, + "loss": 0.5982, + "step": 75300 + }, + { + "epoch": 5.69, + "learning_rate": 5.9909568e-05, + "loss": 0.5922, + "step": 75360 + }, + { + "epoch": 5.69, + "learning_rate": 5.9909496e-05, + "loss": 0.5983, + "step": 75420 + }, + { + "epoch": 5.7, + "learning_rate": 5.9909424e-05, + "loss": 0.5912, + "step": 75480 + }, + { + "epoch": 5.7, + "learning_rate": 5.9909352e-05, + "loss": 0.5958, + "step": 75540 + }, + { + "epoch": 5.71, + "learning_rate": 5.9909280000000005e-05, + "loss": 0.5978, + "step": 75600 + }, + { + "epoch": 5.71, + "learning_rate": 5.9909208e-05, + "loss": 0.5982, + "step": 75660 + }, + { + "epoch": 5.72, + "learning_rate": 5.9909136e-05, + "loss": 0.5973, + "step": 75720 + }, + { + "epoch": 5.72, + "learning_rate": 5.990906400000001e-05, + "loss": 0.5918, + "step": 75780 + }, + { + "epoch": 5.73, + "learning_rate": 5.9908992e-05, + "loss": 0.5985, + "step": 75840 + }, + { + "epoch": 5.73, + "learning_rate": 5.990892e-05, + "loss": 0.6067, + "step": 75900 + }, + { + "epoch": 5.73, + "learning_rate": 5.9908847999999996e-05, + "loss": 0.5987, + "step": 75960 + }, + { + "epoch": 5.74, + "learning_rate": 5.9908776000000006e-05, + "loss": 0.5952, + "step": 76020 + }, + { + "epoch": 5.74, + "learning_rate": 5.9908704e-05, + "loss": 0.5845, + "step": 76080 + }, + { + "epoch": 5.75, + "learning_rate": 5.9908632e-05, + "loss": 0.5931, + "step": 76140 + }, + { + "epoch": 5.75, + "learning_rate": 5.990856e-05, + "loss": 0.5919, + "step": 76200 + }, + { + "epoch": 5.76, + "learning_rate": 5.9908488e-05, + "loss": 0.598, + "step": 76260 + }, + { + "epoch": 5.76, + "learning_rate": 5.9908416e-05, + "loss": 0.5918, + "step": 76320 + }, + { + "epoch": 5.77, + "learning_rate": 5.9908344000000004e-05, + "loss": 0.6009, + "step": 76380 + }, + { + "epoch": 5.77, + "learning_rate": 5.9908272e-05, + "loss": 0.5986, + "step": 76440 + }, + { + "epoch": 5.78, + "learning_rate": 5.99082e-05, + "loss": 0.5821, + "step": 76500 + }, + { + "epoch": 5.78, + "learning_rate": 5.9908128000000007e-05, + "loss": 0.5853, + "step": 76560 + }, + { + "epoch": 5.78, + "learning_rate": 5.9908056e-05, + "loss": 0.5927, + "step": 76620 + }, + { + "epoch": 5.79, + "learning_rate": 5.9907984e-05, + "loss": 0.5981, + "step": 76680 + }, + { + "epoch": 5.79, + "learning_rate": 5.9907912e-05, + "loss": 0.5942, + "step": 76740 + }, + { + "epoch": 5.8, + "learning_rate": 5.9907840000000005e-05, + "loss": 0.602, + "step": 76800 + }, + { + "epoch": 5.8, + "learning_rate": 5.9907768e-05, + "loss": 0.6061, + "step": 76860 + }, + { + "epoch": 5.81, + "learning_rate": 5.9907696e-05, + "loss": 0.5987, + "step": 76920 + }, + { + "epoch": 5.81, + "learning_rate": 5.9907624e-05, + "loss": 0.5855, + "step": 76980 + }, + { + "epoch": 5.82, + "learning_rate": 5.9907552000000004e-05, + "loss": 0.6024, + "step": 77040 + }, + { + "epoch": 5.82, + "learning_rate": 5.990748e-05, + "loss": 0.6012, + "step": 77100 + }, + { + "epoch": 5.83, + "learning_rate": 5.9907408000000004e-05, + "loss": 0.5963, + "step": 77160 + }, + { + "epoch": 5.83, + "learning_rate": 5.9907336e-05, + "loss": 0.589, + "step": 77220 + }, + { + "epoch": 5.83, + "learning_rate": 5.9907263999999996e-05, + "loss": 0.5946, + "step": 77280 + }, + { + "epoch": 5.84, + "learning_rate": 5.9907192000000006e-05, + "loss": 0.5841, + "step": 77340 + }, + { + "epoch": 5.84, + "learning_rate": 5.990712e-05, + "loss": 0.5998, + "step": 77400 + }, + { + "epoch": 5.85, + "learning_rate": 5.9907048e-05, + "loss": 0.595, + "step": 77460 + }, + { + "epoch": 5.85, + "learning_rate": 5.9906976e-05, + "loss": 0.5847, + "step": 77520 + }, + { + "epoch": 5.86, + "learning_rate": 5.9906904000000005e-05, + "loss": 0.5911, + "step": 77580 + }, + { + "epoch": 5.86, + "learning_rate": 5.9906832e-05, + "loss": 0.5893, + "step": 77640 + }, + { + "epoch": 5.87, + "learning_rate": 5.990676e-05, + "loss": 0.5755, + "step": 77700 + }, + { + "epoch": 5.87, + "learning_rate": 5.9906688e-05, + "loss": 0.5896, + "step": 77760 + }, + { + "epoch": 5.88, + "learning_rate": 5.9906616000000004e-05, + "loss": 0.5859, + "step": 77820 + }, + { + "epoch": 5.88, + "learning_rate": 5.9906544e-05, + "loss": 0.6098, + "step": 77880 + }, + { + "epoch": 5.88, + "learning_rate": 5.9906472e-05, + "loss": 0.5881, + "step": 77940 + }, + { + "epoch": 5.89, + "learning_rate": 5.99064e-05, + "loss": 0.5928, + "step": 78000 + }, + { + "epoch": 5.89, + "learning_rate": 5.9906327999999996e-05, + "loss": 0.5871, + "step": 78060 + }, + { + "epoch": 5.9, + "learning_rate": 5.9906256000000006e-05, + "loss": 0.5964, + "step": 78120 + }, + { + "epoch": 5.9, + "learning_rate": 5.9906184e-05, + "loss": 0.6032, + "step": 78180 + }, + { + "epoch": 5.91, + "learning_rate": 5.9906112e-05, + "loss": 0.601, + "step": 78240 + }, + { + "epoch": 5.91, + "learning_rate": 5.990604e-05, + "loss": 0.5932, + "step": 78300 + }, + { + "epoch": 5.92, + "learning_rate": 5.9905968000000005e-05, + "loss": 0.591, + "step": 78360 + }, + { + "epoch": 5.92, + "learning_rate": 5.9905896e-05, + "loss": 0.5946, + "step": 78420 + }, + { + "epoch": 5.93, + "learning_rate": 5.9905824000000004e-05, + "loss": 0.6095, + "step": 78480 + }, + { + "epoch": 5.93, + "learning_rate": 5.9905752e-05, + "loss": 0.6016, + "step": 78540 + }, + { + "epoch": 5.93, + "learning_rate": 5.9905680000000003e-05, + "loss": 0.6, + "step": 78600 + }, + { + "epoch": 5.94, + "learning_rate": 5.9905608e-05, + "loss": 0.6051, + "step": 78660 + }, + { + "epoch": 5.94, + "learning_rate": 5.9905536e-05, + "loss": 0.5969, + "step": 78720 + }, + { + "epoch": 5.95, + "learning_rate": 5.9905464e-05, + "loss": 0.598, + "step": 78780 + }, + { + "epoch": 5.95, + "learning_rate": 5.9905391999999995e-05, + "loss": 0.5902, + "step": 78840 + }, + { + "epoch": 5.96, + "learning_rate": 5.9905320000000005e-05, + "loss": 0.5953, + "step": 78900 + }, + { + "epoch": 5.96, + "learning_rate": 5.9905248e-05, + "loss": 0.594, + "step": 78960 + }, + { + "epoch": 5.97, + "learning_rate": 5.9905176e-05, + "loss": 0.5871, + "step": 79020 + }, + { + "epoch": 5.97, + "learning_rate": 5.9905104e-05, + "loss": 0.6082, + "step": 79080 + }, + { + "epoch": 5.98, + "learning_rate": 5.9905032000000004e-05, + "loss": 0.5992, + "step": 79140 + }, + { + "epoch": 5.98, + "learning_rate": 5.990496e-05, + "loss": 0.595, + "step": 79200 + }, + { + "epoch": 5.98, + "learning_rate": 5.9904888000000004e-05, + "loss": 0.5921, + "step": 79260 + }, + { + "epoch": 5.99, + "learning_rate": 5.9904816e-05, + "loss": 0.5906, + "step": 79320 + }, + { + "epoch": 5.99, + "learning_rate": 5.9904744e-05, + "loss": 0.5945, + "step": 79380 + }, + { + "epoch": 6.0, + "learning_rate": 5.9904672000000006e-05, + "loss": 0.6001, + "step": 79440 + }, + { + "epoch": 6.0, + "eval_loss": 0.5644345283508301, + "eval_runtime": 338.3121, + "eval_samples_per_second": 295.585, + "eval_steps_per_second": 2.311, + "step": 79470 + }, + { + "epoch": 6.0, + "learning_rate": 5.99046e-05, + "loss": 0.5921, + "step": 79500 + }, + { + "epoch": 6.01, + "learning_rate": 5.9904528e-05, + "loss": 0.59, + "step": 79560 + }, + { + "epoch": 6.01, + "learning_rate": 5.9904456e-05, + "loss": 0.5994, + "step": 79620 + }, + { + "epoch": 6.02, + "learning_rate": 5.9904384000000005e-05, + "loss": 0.5955, + "step": 79680 + }, + { + "epoch": 6.02, + "learning_rate": 5.9904312e-05, + "loss": 0.5916, + "step": 79740 + }, + { + "epoch": 6.02, + "learning_rate": 5.990424e-05, + "loss": 0.5788, + "step": 79800 + }, + { + "epoch": 6.03, + "learning_rate": 5.9904168e-05, + "loss": 0.5976, + "step": 79860 + }, + { + "epoch": 6.03, + "learning_rate": 5.9904096000000004e-05, + "loss": 0.5906, + "step": 79920 + }, + { + "epoch": 6.04, + "learning_rate": 5.9904024e-05, + "loss": 0.5782, + "step": 79980 + }, + { + "epoch": 6.04, + "learning_rate": 5.9903952e-05, + "loss": 0.6055, + "step": 80040 + }, + { + "epoch": 6.05, + "learning_rate": 5.990388e-05, + "loss": 0.5741, + "step": 80100 + }, + { + "epoch": 6.05, + "learning_rate": 5.9903808e-05, + "loss": 0.5896, + "step": 80160 + }, + { + "epoch": 6.06, + "learning_rate": 5.9903736000000006e-05, + "loss": 0.575, + "step": 80220 + }, + { + "epoch": 6.06, + "learning_rate": 5.9903664e-05, + "loss": 0.579, + "step": 80280 + }, + { + "epoch": 6.07, + "learning_rate": 5.9903592e-05, + "loss": 0.5835, + "step": 80340 + }, + { + "epoch": 6.07, + "learning_rate": 5.990352e-05, + "loss": 0.5945, + "step": 80400 + }, + { + "epoch": 6.07, + "learning_rate": 5.9903448000000004e-05, + "loss": 0.5817, + "step": 80460 + }, + { + "epoch": 6.08, + "learning_rate": 5.9903376e-05, + "loss": 0.5845, + "step": 80520 + }, + { + "epoch": 6.08, + "learning_rate": 5.9903304e-05, + "loss": 0.5876, + "step": 80580 + }, + { + "epoch": 6.09, + "learning_rate": 5.9903232e-05, + "loss": 0.5875, + "step": 80640 + }, + { + "epoch": 6.09, + "learning_rate": 5.990316e-05, + "loss": 0.5869, + "step": 80700 + }, + { + "epoch": 6.1, + "learning_rate": 5.9903088e-05, + "loss": 0.5873, + "step": 80760 + }, + { + "epoch": 6.1, + "learning_rate": 5.9903016e-05, + "loss": 0.5796, + "step": 80820 + }, + { + "epoch": 6.11, + "learning_rate": 5.9902944e-05, + "loss": 0.6041, + "step": 80880 + }, + { + "epoch": 6.11, + "learning_rate": 5.9902872e-05, + "loss": 0.587, + "step": 80940 + }, + { + "epoch": 6.12, + "learning_rate": 5.9902800000000005e-05, + "loss": 0.5826, + "step": 81000 + }, + { + "epoch": 6.12, + "learning_rate": 5.9902728e-05, + "loss": 0.5953, + "step": 81060 + }, + { + "epoch": 6.12, + "learning_rate": 5.9902656e-05, + "loss": 0.5851, + "step": 81120 + }, + { + "epoch": 6.13, + "learning_rate": 5.990258400000001e-05, + "loss": 0.5919, + "step": 81180 + }, + { + "epoch": 6.13, + "learning_rate": 5.9902512000000004e-05, + "loss": 0.5829, + "step": 81240 + }, + { + "epoch": 6.14, + "learning_rate": 5.990244e-05, + "loss": 0.5911, + "step": 81300 + }, + { + "epoch": 6.14, + "learning_rate": 5.9902368e-05, + "loss": 0.5872, + "step": 81360 + }, + { + "epoch": 6.15, + "learning_rate": 5.9902296e-05, + "loss": 0.5879, + "step": 81420 + }, + { + "epoch": 6.15, + "learning_rate": 5.9902224e-05, + "loss": 0.5873, + "step": 81480 + }, + { + "epoch": 6.16, + "learning_rate": 5.9902152e-05, + "loss": 0.5865, + "step": 81540 + }, + { + "epoch": 6.16, + "learning_rate": 5.990208e-05, + "loss": 0.5723, + "step": 81600 + }, + { + "epoch": 6.17, + "learning_rate": 5.9902008e-05, + "loss": 0.5865, + "step": 81660 + }, + { + "epoch": 6.17, + "learning_rate": 5.9901936e-05, + "loss": 0.5861, + "step": 81720 + }, + { + "epoch": 6.17, + "learning_rate": 5.9901864000000005e-05, + "loss": 0.5925, + "step": 81780 + }, + { + "epoch": 6.18, + "learning_rate": 5.9901792e-05, + "loss": 0.5928, + "step": 81840 + }, + { + "epoch": 6.18, + "learning_rate": 5.990172e-05, + "loss": 0.5826, + "step": 81900 + }, + { + "epoch": 6.19, + "learning_rate": 5.990164800000001e-05, + "loss": 0.5909, + "step": 81960 + }, + { + "epoch": 6.19, + "learning_rate": 5.9901576000000004e-05, + "loss": 0.5895, + "step": 82020 + }, + { + "epoch": 6.2, + "learning_rate": 5.9901504e-05, + "loss": 0.5725, + "step": 82080 + }, + { + "epoch": 6.2, + "learning_rate": 5.9901431999999996e-05, + "loss": 0.5894, + "step": 82140 + }, + { + "epoch": 6.21, + "learning_rate": 5.9901360000000006e-05, + "loss": 0.5825, + "step": 82200 + }, + { + "epoch": 6.21, + "learning_rate": 5.9901288e-05, + "loss": 0.5783, + "step": 82260 + }, + { + "epoch": 6.22, + "learning_rate": 5.9901216e-05, + "loss": 0.5806, + "step": 82320 + }, + { + "epoch": 6.22, + "learning_rate": 5.9901144e-05, + "loss": 0.5812, + "step": 82380 + }, + { + "epoch": 6.22, + "learning_rate": 5.9901072e-05, + "loss": 0.593, + "step": 82440 + }, + { + "epoch": 6.23, + "learning_rate": 5.9901e-05, + "loss": 0.5773, + "step": 82500 + }, + { + "epoch": 6.23, + "learning_rate": 5.9900928000000004e-05, + "loss": 0.581, + "step": 82560 + }, + { + "epoch": 6.24, + "learning_rate": 5.9900856e-05, + "loss": 0.5819, + "step": 82620 + }, + { + "epoch": 6.24, + "learning_rate": 5.9900784e-05, + "loss": 0.5973, + "step": 82680 + }, + { + "epoch": 6.25, + "learning_rate": 5.990071200000001e-05, + "loss": 0.5941, + "step": 82740 + }, + { + "epoch": 6.25, + "learning_rate": 5.990064e-05, + "loss": 0.5874, + "step": 82800 + }, + { + "epoch": 6.26, + "learning_rate": 5.9900568e-05, + "loss": 0.5889, + "step": 82860 + }, + { + "epoch": 6.26, + "learning_rate": 5.9900496e-05, + "loss": 0.5878, + "step": 82920 + }, + { + "epoch": 6.27, + "learning_rate": 5.9900424000000006e-05, + "loss": 0.5899, + "step": 82980 + }, + { + "epoch": 6.27, + "learning_rate": 5.9900352e-05, + "loss": 0.5884, + "step": 83040 + }, + { + "epoch": 6.27, + "learning_rate": 5.990028e-05, + "loss": 0.5962, + "step": 83100 + }, + { + "epoch": 6.28, + "learning_rate": 5.9900208e-05, + "loss": 0.5812, + "step": 83160 + }, + { + "epoch": 6.28, + "learning_rate": 5.9900136e-05, + "loss": 0.5924, + "step": 83220 + }, + { + "epoch": 6.29, + "learning_rate": 5.9900064e-05, + "loss": 0.5824, + "step": 83280 + }, + { + "epoch": 6.29, + "learning_rate": 5.9899992000000004e-05, + "loss": 0.5817, + "step": 83340 + }, + { + "epoch": 6.3, + "learning_rate": 5.989992e-05, + "loss": 0.5796, + "step": 83400 + }, + { + "epoch": 6.3, + "learning_rate": 5.9899847999999997e-05, + "loss": 0.6034, + "step": 83460 + }, + { + "epoch": 6.31, + "learning_rate": 5.9899776000000006e-05, + "loss": 0.5809, + "step": 83520 + }, + { + "epoch": 6.31, + "learning_rate": 5.9899704e-05, + "loss": 0.5832, + "step": 83580 + }, + { + "epoch": 6.31, + "learning_rate": 5.9899632e-05, + "loss": 0.582, + "step": 83640 + }, + { + "epoch": 6.32, + "learning_rate": 5.989956e-05, + "loss": 0.57, + "step": 83700 + }, + { + "epoch": 6.32, + "learning_rate": 5.9899488000000005e-05, + "loss": 0.5765, + "step": 83760 + }, + { + "epoch": 6.33, + "learning_rate": 5.9899416e-05, + "loss": 0.5828, + "step": 83820 + }, + { + "epoch": 6.33, + "learning_rate": 5.9899344000000005e-05, + "loss": 0.5766, + "step": 83880 + }, + { + "epoch": 6.34, + "learning_rate": 5.9899272e-05, + "loss": 0.5856, + "step": 83940 + }, + { + "epoch": 6.34, + "learning_rate": 5.98992e-05, + "loss": 0.592, + "step": 84000 + }, + { + "epoch": 6.35, + "learning_rate": 5.9899128e-05, + "loss": 0.5809, + "step": 84060 + }, + { + "epoch": 6.35, + "learning_rate": 5.9899056000000004e-05, + "loss": 0.5756, + "step": 84120 + }, + { + "epoch": 6.36, + "learning_rate": 5.9898984e-05, + "loss": 0.5894, + "step": 84180 + }, + { + "epoch": 6.36, + "learning_rate": 5.9898911999999996e-05, + "loss": 0.584, + "step": 84240 + }, + { + "epoch": 6.36, + "learning_rate": 5.9898840000000006e-05, + "loss": 0.5866, + "step": 84300 + }, + { + "epoch": 6.37, + "learning_rate": 5.9898768e-05, + "loss": 0.5815, + "step": 84360 + }, + { + "epoch": 6.37, + "learning_rate": 5.9898696e-05, + "loss": 0.5796, + "step": 84420 + }, + { + "epoch": 6.38, + "learning_rate": 5.9898624e-05, + "loss": 0.5779, + "step": 84480 + }, + { + "epoch": 6.38, + "learning_rate": 5.9898552000000005e-05, + "loss": 0.5908, + "step": 84540 + }, + { + "epoch": 6.39, + "learning_rate": 5.989848e-05, + "loss": 0.5897, + "step": 84600 + }, + { + "epoch": 6.39, + "learning_rate": 5.9898408000000004e-05, + "loss": 0.5884, + "step": 84660 + }, + { + "epoch": 6.4, + "learning_rate": 5.9898336e-05, + "loss": 0.5725, + "step": 84720 + }, + { + "epoch": 6.4, + "learning_rate": 5.9898264000000004e-05, + "loss": 0.5918, + "step": 84780 + }, + { + "epoch": 6.41, + "learning_rate": 5.9898192e-05, + "loss": 0.585, + "step": 84840 + }, + { + "epoch": 6.41, + "learning_rate": 5.989812e-05, + "loss": 0.5804, + "step": 84900 + }, + { + "epoch": 6.41, + "learning_rate": 5.9898048e-05, + "loss": 0.5777, + "step": 84960 + }, + { + "epoch": 6.42, + "learning_rate": 5.9897975999999996e-05, + "loss": 0.5838, + "step": 85020 + }, + { + "epoch": 6.42, + "learning_rate": 5.9897904000000006e-05, + "loss": 0.5902, + "step": 85080 + }, + { + "epoch": 6.43, + "learning_rate": 5.9897832e-05, + "loss": 0.5763, + "step": 85140 + }, + { + "epoch": 6.43, + "learning_rate": 5.989776e-05, + "loss": 0.5702, + "step": 85200 + }, + { + "epoch": 6.44, + "learning_rate": 5.9897688e-05, + "loss": 0.5807, + "step": 85260 + }, + { + "epoch": 6.44, + "learning_rate": 5.9897616000000004e-05, + "loss": 0.5957, + "step": 85320 + }, + { + "epoch": 6.45, + "learning_rate": 5.9897544e-05, + "loss": 0.5953, + "step": 85380 + }, + { + "epoch": 6.45, + "learning_rate": 5.9897472000000004e-05, + "loss": 0.5845, + "step": 85440 + }, + { + "epoch": 6.46, + "learning_rate": 5.98974e-05, + "loss": 0.6025, + "step": 85500 + }, + { + "epoch": 6.46, + "learning_rate": 5.9897328e-05, + "loss": 0.5909, + "step": 85560 + }, + { + "epoch": 6.46, + "learning_rate": 5.9897256000000006e-05, + "loss": 0.6031, + "step": 85620 + }, + { + "epoch": 6.47, + "learning_rate": 5.9897184e-05, + "loss": 0.5765, + "step": 85680 + }, + { + "epoch": 6.47, + "learning_rate": 5.9897112e-05, + "loss": 0.5887, + "step": 85740 + }, + { + "epoch": 6.48, + "learning_rate": 5.9897039999999995e-05, + "loss": 0.5918, + "step": 85800 + }, + { + "epoch": 6.48, + "learning_rate": 5.9896968000000005e-05, + "loss": 0.5826, + "step": 85860 + }, + { + "epoch": 6.49, + "learning_rate": 5.9896896e-05, + "loss": 0.5864, + "step": 85920 + }, + { + "epoch": 6.49, + "learning_rate": 5.9896824e-05, + "loss": 0.5921, + "step": 85980 + }, + { + "epoch": 6.5, + "learning_rate": 5.9896752e-05, + "loss": 0.5878, + "step": 86040 + }, + { + "epoch": 6.5, + "learning_rate": 5.9896680000000004e-05, + "loss": 0.5893, + "step": 86100 + }, + { + "epoch": 6.51, + "learning_rate": 5.9896608e-05, + "loss": 0.5741, + "step": 86160 + }, + { + "epoch": 6.51, + "learning_rate": 5.9896536000000003e-05, + "loss": 0.5796, + "step": 86220 + }, + { + "epoch": 6.51, + "learning_rate": 5.9896464e-05, + "loss": 0.5714, + "step": 86280 + }, + { + "epoch": 6.52, + "learning_rate": 5.9896392e-05, + "loss": 0.5863, + "step": 86340 + }, + { + "epoch": 6.52, + "learning_rate": 5.9896320000000006e-05, + "loss": 0.5824, + "step": 86400 + }, + { + "epoch": 6.53, + "learning_rate": 5.9896248e-05, + "loss": 0.5816, + "step": 86460 + }, + { + "epoch": 6.53, + "learning_rate": 5.9896176e-05, + "loss": 0.5969, + "step": 86520 + }, + { + "epoch": 6.54, + "learning_rate": 5.9896103999999995e-05, + "loss": 0.5828, + "step": 86580 + }, + { + "epoch": 6.54, + "learning_rate": 5.9896032000000005e-05, + "loss": 0.5777, + "step": 86640 + }, + { + "epoch": 6.55, + "learning_rate": 5.989596e-05, + "loss": 0.5836, + "step": 86700 + }, + { + "epoch": 6.55, + "learning_rate": 5.9895888e-05, + "loss": 0.5998, + "step": 86760 + }, + { + "epoch": 6.55, + "learning_rate": 5.9895816e-05, + "loss": 0.5851, + "step": 86820 + }, + { + "epoch": 6.56, + "learning_rate": 5.9895744000000004e-05, + "loss": 0.5884, + "step": 86880 + }, + { + "epoch": 6.56, + "learning_rate": 5.9895672e-05, + "loss": 0.5761, + "step": 86940 + }, + { + "epoch": 6.57, + "learning_rate": 5.98956e-05, + "loss": 0.5775, + "step": 87000 + }, + { + "epoch": 6.57, + "learning_rate": 5.9895528e-05, + "loss": 0.5872, + "step": 87060 + }, + { + "epoch": 6.58, + "learning_rate": 5.9895456e-05, + "loss": 0.5801, + "step": 87120 + }, + { + "epoch": 6.58, + "learning_rate": 5.9895384000000006e-05, + "loss": 0.5934, + "step": 87180 + }, + { + "epoch": 6.59, + "learning_rate": 5.9895312e-05, + "loss": 0.584, + "step": 87240 + }, + { + "epoch": 6.59, + "learning_rate": 5.989524e-05, + "loss": 0.5738, + "step": 87300 + }, + { + "epoch": 6.6, + "learning_rate": 5.9895168e-05, + "loss": 0.5793, + "step": 87360 + }, + { + "epoch": 6.6, + "learning_rate": 5.9895096000000004e-05, + "loss": 0.5901, + "step": 87420 + }, + { + "epoch": 6.6, + "learning_rate": 5.9895024e-05, + "loss": 0.5848, + "step": 87480 + }, + { + "epoch": 6.61, + "learning_rate": 5.9894952e-05, + "loss": 0.5762, + "step": 87540 + }, + { + "epoch": 6.61, + "learning_rate": 5.989488e-05, + "loss": 0.5841, + "step": 87600 + }, + { + "epoch": 6.62, + "learning_rate": 5.9894808e-05, + "loss": 0.581, + "step": 87660 + }, + { + "epoch": 6.62, + "learning_rate": 5.9894736e-05, + "loss": 0.5923, + "step": 87720 + }, + { + "epoch": 6.63, + "learning_rate": 5.9894664e-05, + "loss": 0.5986, + "step": 87780 + }, + { + "epoch": 6.63, + "learning_rate": 5.9894592e-05, + "loss": 0.5811, + "step": 87840 + }, + { + "epoch": 6.64, + "learning_rate": 5.989452e-05, + "loss": 0.5936, + "step": 87900 + }, + { + "epoch": 6.64, + "learning_rate": 5.9894448000000005e-05, + "loss": 0.5842, + "step": 87960 + }, + { + "epoch": 6.65, + "learning_rate": 5.9894376e-05, + "loss": 0.586, + "step": 88020 + }, + { + "epoch": 6.65, + "learning_rate": 5.9894304e-05, + "loss": 0.5939, + "step": 88080 + }, + { + "epoch": 6.65, + "learning_rate": 5.989423200000001e-05, + "loss": 0.5847, + "step": 88140 + }, + { + "epoch": 6.66, + "learning_rate": 5.9894160000000004e-05, + "loss": 0.5964, + "step": 88200 + }, + { + "epoch": 6.66, + "learning_rate": 5.9894088e-05, + "loss": 0.5861, + "step": 88260 + }, + { + "epoch": 6.67, + "learning_rate": 5.9894016e-05, + "loss": 0.5808, + "step": 88320 + }, + { + "epoch": 6.67, + "learning_rate": 5.9893944e-05, + "loss": 0.5997, + "step": 88380 + }, + { + "epoch": 6.68, + "learning_rate": 5.9893872e-05, + "loss": 0.5815, + "step": 88440 + }, + { + "epoch": 6.68, + "learning_rate": 5.98938e-05, + "loss": 0.5903, + "step": 88500 + }, + { + "epoch": 6.69, + "learning_rate": 5.9893728e-05, + "loss": 0.59, + "step": 88560 + }, + { + "epoch": 6.69, + "learning_rate": 5.9893656e-05, + "loss": 0.5801, + "step": 88620 + }, + { + "epoch": 6.7, + "learning_rate": 5.9893584e-05, + "loss": 0.5801, + "step": 88680 + }, + { + "epoch": 6.7, + "learning_rate": 5.9893512000000005e-05, + "loss": 0.5811, + "step": 88740 + }, + { + "epoch": 6.7, + "learning_rate": 5.989344e-05, + "loss": 0.591, + "step": 88800 + }, + { + "epoch": 6.71, + "learning_rate": 5.9893368e-05, + "loss": 0.5894, + "step": 88860 + }, + { + "epoch": 6.71, + "learning_rate": 5.989329600000001e-05, + "loss": 0.5805, + "step": 88920 + }, + { + "epoch": 6.72, + "learning_rate": 5.9893224000000003e-05, + "loss": 0.5849, + "step": 88980 + }, + { + "epoch": 6.72, + "learning_rate": 5.9893152e-05, + "loss": 0.5835, + "step": 89040 + }, + { + "epoch": 6.73, + "learning_rate": 5.989308e-05, + "loss": 0.5938, + "step": 89100 + }, + { + "epoch": 6.73, + "learning_rate": 5.9893008e-05, + "loss": 0.5937, + "step": 89160 + }, + { + "epoch": 6.74, + "learning_rate": 5.9892936e-05, + "loss": 0.5864, + "step": 89220 + }, + { + "epoch": 6.74, + "learning_rate": 5.9892864e-05, + "loss": 0.5814, + "step": 89280 + }, + { + "epoch": 6.75, + "learning_rate": 5.9892792e-05, + "loss": 0.5774, + "step": 89340 + }, + { + "epoch": 6.75, + "learning_rate": 5.989272e-05, + "loss": 0.5917, + "step": 89400 + }, + { + "epoch": 6.75, + "learning_rate": 5.9892648e-05, + "loss": 0.5861, + "step": 89460 + }, + { + "epoch": 6.76, + "learning_rate": 5.9892576000000004e-05, + "loss": 0.5781, + "step": 89520 + }, + { + "epoch": 6.76, + "learning_rate": 5.9892504e-05, + "loss": 0.5817, + "step": 89580 + }, + { + "epoch": 6.77, + "learning_rate": 5.9892432e-05, + "loss": 0.5847, + "step": 89640 + }, + { + "epoch": 6.77, + "learning_rate": 5.989236000000001e-05, + "loss": 0.5865, + "step": 89700 + }, + { + "epoch": 6.78, + "learning_rate": 5.9892288e-05, + "loss": 0.5813, + "step": 89760 + }, + { + "epoch": 6.78, + "learning_rate": 5.9892216e-05, + "loss": 0.5787, + "step": 89820 + }, + { + "epoch": 6.79, + "learning_rate": 5.9892144e-05, + "loss": 0.5754, + "step": 89880 + }, + { + "epoch": 6.79, + "learning_rate": 5.9892072e-05, + "loss": 0.5839, + "step": 89940 + }, + { + "epoch": 6.8, + "learning_rate": 5.9892e-05, + "loss": 0.5826, + "step": 90000 + }, + { + "epoch": 6.8, + "learning_rate": 5.9891928000000005e-05, + "loss": 0.5811, + "step": 90060 + }, + { + "epoch": 6.8, + "learning_rate": 5.9891856e-05, + "loss": 0.5754, + "step": 90120 + }, + { + "epoch": 6.81, + "learning_rate": 5.9891784e-05, + "loss": 0.5925, + "step": 90180 + }, + { + "epoch": 6.81, + "learning_rate": 5.9891712e-05, + "loss": 0.5707, + "step": 90240 + }, + { + "epoch": 6.82, + "learning_rate": 5.9891640000000004e-05, + "loss": 0.577, + "step": 90300 + }, + { + "epoch": 6.82, + "learning_rate": 5.9891568e-05, + "loss": 0.5724, + "step": 90360 + }, + { + "epoch": 6.83, + "learning_rate": 5.9891495999999996e-05, + "loss": 0.5719, + "step": 90420 + }, + { + "epoch": 6.83, + "learning_rate": 5.9891424000000006e-05, + "loss": 0.574, + "step": 90480 + }, + { + "epoch": 6.84, + "learning_rate": 5.9891352e-05, + "loss": 0.5835, + "step": 90540 + }, + { + "epoch": 6.84, + "learning_rate": 5.989128e-05, + "loss": 0.5839, + "step": 90600 + }, + { + "epoch": 6.84, + "learning_rate": 5.9891208e-05, + "loss": 0.5915, + "step": 90660 + }, + { + "epoch": 6.85, + "learning_rate": 5.9891136000000005e-05, + "loss": 0.5833, + "step": 90720 + }, + { + "epoch": 6.85, + "learning_rate": 5.9891064e-05, + "loss": 0.5849, + "step": 90780 + }, + { + "epoch": 6.86, + "learning_rate": 5.9890992000000005e-05, + "loss": 0.5737, + "step": 90840 + }, + { + "epoch": 6.86, + "learning_rate": 5.989092e-05, + "loss": 0.5922, + "step": 90900 + }, + { + "epoch": 6.87, + "learning_rate": 5.9890848e-05, + "loss": 0.583, + "step": 90960 + }, + { + "epoch": 6.87, + "learning_rate": 5.9890776e-05, + "loss": 0.581, + "step": 91020 + }, + { + "epoch": 6.88, + "learning_rate": 5.9890704e-05, + "loss": 0.5743, + "step": 91080 + }, + { + "epoch": 6.88, + "learning_rate": 5.9890632e-05, + "loss": 0.5791, + "step": 91140 + }, + { + "epoch": 6.89, + "learning_rate": 5.9890559999999996e-05, + "loss": 0.5735, + "step": 91200 + }, + { + "epoch": 6.89, + "learning_rate": 5.9890488000000006e-05, + "loss": 0.5844, + "step": 91260 + }, + { + "epoch": 6.89, + "learning_rate": 5.9890416e-05, + "loss": 0.5857, + "step": 91320 + }, + { + "epoch": 6.9, + "learning_rate": 5.9890344e-05, + "loss": 0.586, + "step": 91380 + }, + { + "epoch": 6.9, + "learning_rate": 5.9890272e-05, + "loss": 0.586, + "step": 91440 + }, + { + "epoch": 6.91, + "learning_rate": 5.9890200000000005e-05, + "loss": 0.5881, + "step": 91500 + }, + { + "epoch": 6.91, + "learning_rate": 5.9890128e-05, + "loss": 0.5847, + "step": 91560 + }, + { + "epoch": 6.92, + "learning_rate": 5.9890056000000004e-05, + "loss": 0.5821, + "step": 91620 + }, + { + "epoch": 6.92, + "learning_rate": 5.9889984e-05, + "loss": 0.5704, + "step": 91680 + }, + { + "epoch": 6.93, + "learning_rate": 5.9889912e-05, + "loss": 0.5919, + "step": 91740 + }, + { + "epoch": 6.93, + "learning_rate": 5.988984000000001e-05, + "loss": 0.5823, + "step": 91800 + }, + { + "epoch": 6.94, + "learning_rate": 5.9889768e-05, + "loss": 0.5871, + "step": 91860 + }, + { + "epoch": 6.94, + "learning_rate": 5.9889696e-05, + "loss": 0.5815, + "step": 91920 + }, + { + "epoch": 6.94, + "learning_rate": 5.9889623999999996e-05, + "loss": 0.5844, + "step": 91980 + }, + { + "epoch": 6.95, + "learning_rate": 5.9889552000000005e-05, + "loss": 0.5819, + "step": 92040 + }, + { + "epoch": 6.95, + "learning_rate": 5.988948e-05, + "loss": 0.5846, + "step": 92100 + }, + { + "epoch": 6.96, + "learning_rate": 5.9889408e-05, + "loss": 0.5786, + "step": 92160 + }, + { + "epoch": 6.96, + "learning_rate": 5.9889336e-05, + "loss": 0.589, + "step": 92220 + }, + { + "epoch": 6.97, + "learning_rate": 5.9889264000000004e-05, + "loss": 0.5776, + "step": 92280 + }, + { + "epoch": 6.97, + "learning_rate": 5.9889192e-05, + "loss": 0.5901, + "step": 92340 + }, + { + "epoch": 6.98, + "learning_rate": 5.9889120000000004e-05, + "loss": 0.5847, + "step": 92400 + }, + { + "epoch": 6.98, + "learning_rate": 5.9889048e-05, + "loss": 0.597, + "step": 92460 + } + ], + "logging_steps": 60, + "max_steps": 50000000, + "num_train_epochs": 3776, + "save_steps": 500, + "total_flos": 2.045499936537987e+18, + "trial_name": null, + "trial_params": null +}