{ "best_metric": 0.6827040314674377, "best_model_checkpoint": "checkpoints/1a_52k/checkpoint-6200", "epoch": 1.9838412926965843, "eval_steps": 200, "global_step": 6200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.6999999999999996e-05, "loss": 2.0256, "step": 10 }, { "epoch": 0.01, "learning_rate": 5.399999999999999e-05, "loss": 1.9236, "step": 20 }, { "epoch": 0.01, "learning_rate": 7.8e-05, "loss": 1.712, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.00010799999999999998, "loss": 1.2747, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.000138, "loss": 1.0657, "step": 50 }, { "epoch": 0.02, "learning_rate": 0.000168, "loss": 0.973, "step": 60 }, { "epoch": 0.02, "learning_rate": 0.000198, "loss": 0.8178, "step": 70 }, { "epoch": 0.03, "learning_rate": 0.00022799999999999999, "loss": 0.7901, "step": 80 }, { "epoch": 0.03, "learning_rate": 0.000258, "loss": 0.7289, "step": 90 }, { "epoch": 0.03, "learning_rate": 0.00028799999999999995, "loss": 0.7429, "step": 100 }, { "epoch": 0.04, "learning_rate": 0.0002998840579710145, "loss": 0.7631, "step": 110 }, { "epoch": 0.04, "learning_rate": 0.0002996908212560386, "loss": 0.7339, "step": 120 }, { "epoch": 0.04, "learning_rate": 0.00029949758454106277, "loss": 0.7435, "step": 130 }, { "epoch": 0.04, "learning_rate": 0.00029930434782608696, "loss": 0.7333, "step": 140 }, { "epoch": 0.05, "learning_rate": 0.0002991111111111111, "loss": 0.769, "step": 150 }, { "epoch": 0.05, "learning_rate": 0.00029891787439613525, "loss": 0.7644, "step": 160 }, { "epoch": 0.05, "learning_rate": 0.0002987246376811594, "loss": 0.7517, "step": 170 }, { "epoch": 0.06, "learning_rate": 0.0002985314009661836, "loss": 0.7212, "step": 180 }, { "epoch": 0.06, "learning_rate": 0.00029833816425120773, "loss": 0.745, "step": 190 }, { "epoch": 0.06, "learning_rate": 0.00029814492753623187, "loss": 0.7023, "step": 200 }, { "epoch": 0.06, "eval_loss": 0.7244767546653748, "eval_runtime": 150.1014, "eval_samples_per_second": 13.324, "eval_steps_per_second": 1.666, "step": 200 }, { "epoch": 0.07, "learning_rate": 0.000297951690821256, "loss": 0.6783, "step": 210 }, { "epoch": 0.07, "learning_rate": 0.00029775845410628016, "loss": 0.7327, "step": 220 }, { "epoch": 0.07, "learning_rate": 0.00029756521739130435, "loss": 0.69, "step": 230 }, { "epoch": 0.08, "learning_rate": 0.0002973719806763285, "loss": 0.7069, "step": 240 }, { "epoch": 0.08, "learning_rate": 0.00029717874396135264, "loss": 0.7276, "step": 250 }, { "epoch": 0.08, "learning_rate": 0.0002969855072463768, "loss": 0.7356, "step": 260 }, { "epoch": 0.09, "learning_rate": 0.000296792270531401, "loss": 0.7103, "step": 270 }, { "epoch": 0.09, "learning_rate": 0.0002965990338164251, "loss": 0.7224, "step": 280 }, { "epoch": 0.09, "learning_rate": 0.00029640579710144926, "loss": 0.6898, "step": 290 }, { "epoch": 0.1, "learning_rate": 0.0002962125603864734, "loss": 0.7222, "step": 300 }, { "epoch": 0.1, "learning_rate": 0.00029601932367149755, "loss": 0.685, "step": 310 }, { "epoch": 0.1, "learning_rate": 0.00029582608695652175, "loss": 0.7389, "step": 320 }, { "epoch": 0.11, "learning_rate": 0.0002956328502415459, "loss": 0.6956, "step": 330 }, { "epoch": 0.11, "learning_rate": 0.00029543961352657003, "loss": 0.7191, "step": 340 }, { "epoch": 0.11, "learning_rate": 0.0002952463768115942, "loss": 0.6938, "step": 350 }, { "epoch": 0.12, "learning_rate": 0.00029505314009661837, "loss": 0.695, "step": 360 }, { "epoch": 0.12, "learning_rate": 0.0002948599033816425, "loss": 0.7169, "step": 370 }, { "epoch": 0.12, "learning_rate": 0.00029466666666666666, "loss": 0.7313, "step": 380 }, { "epoch": 0.12, "learning_rate": 0.0002944734299516908, "loss": 0.7016, "step": 390 }, { "epoch": 0.13, "learning_rate": 0.00029428019323671494, "loss": 0.7149, "step": 400 }, { "epoch": 0.13, "eval_loss": 0.7104699611663818, "eval_runtime": 150.0015, "eval_samples_per_second": 13.333, "eval_steps_per_second": 1.667, "step": 400 }, { "epoch": 0.13, "learning_rate": 0.00029408695652173914, "loss": 0.7133, "step": 410 }, { "epoch": 0.13, "learning_rate": 0.0002938937198067633, "loss": 0.7568, "step": 420 }, { "epoch": 0.14, "learning_rate": 0.0002937004830917874, "loss": 0.7159, "step": 430 }, { "epoch": 0.14, "learning_rate": 0.00029350724637681156, "loss": 0.7356, "step": 440 }, { "epoch": 0.14, "learning_rate": 0.00029331400966183576, "loss": 0.665, "step": 450 }, { "epoch": 0.15, "learning_rate": 0.0002931207729468599, "loss": 0.7017, "step": 460 }, { "epoch": 0.15, "learning_rate": 0.00029292753623188405, "loss": 0.6979, "step": 470 }, { "epoch": 0.15, "learning_rate": 0.0002927342995169082, "loss": 0.7104, "step": 480 }, { "epoch": 0.16, "learning_rate": 0.00029254106280193233, "loss": 0.6907, "step": 490 }, { "epoch": 0.16, "learning_rate": 0.00029234782608695653, "loss": 0.7407, "step": 500 }, { "epoch": 0.16, "learning_rate": 0.00029215458937198067, "loss": 0.7028, "step": 510 }, { "epoch": 0.17, "learning_rate": 0.0002919613526570048, "loss": 0.7102, "step": 520 }, { "epoch": 0.17, "learning_rate": 0.00029176811594202896, "loss": 0.6956, "step": 530 }, { "epoch": 0.17, "learning_rate": 0.00029157487922705315, "loss": 0.6926, "step": 540 }, { "epoch": 0.18, "learning_rate": 0.0002913816425120773, "loss": 0.7114, "step": 550 }, { "epoch": 0.18, "learning_rate": 0.00029118840579710144, "loss": 0.7066, "step": 560 }, { "epoch": 0.18, "learning_rate": 0.0002909951690821256, "loss": 0.6853, "step": 570 }, { "epoch": 0.19, "learning_rate": 0.0002908019323671497, "loss": 0.7063, "step": 580 }, { "epoch": 0.19, "learning_rate": 0.0002906086956521739, "loss": 0.7064, "step": 590 }, { "epoch": 0.19, "learning_rate": 0.00029041545893719806, "loss": 0.729, "step": 600 }, { "epoch": 0.19, "eval_loss": 0.7055845856666565, "eval_runtime": 149.8446, "eval_samples_per_second": 13.347, "eval_steps_per_second": 1.668, "step": 600 }, { "epoch": 0.2, "learning_rate": 0.0002902222222222222, "loss": 0.7098, "step": 610 }, { "epoch": 0.2, "learning_rate": 0.00029002898550724635, "loss": 0.6363, "step": 620 }, { "epoch": 0.2, "learning_rate": 0.0002898357487922705, "loss": 0.7128, "step": 630 }, { "epoch": 0.2, "learning_rate": 0.0002896425120772947, "loss": 0.692, "step": 640 }, { "epoch": 0.21, "learning_rate": 0.00028944927536231883, "loss": 0.7068, "step": 650 }, { "epoch": 0.21, "learning_rate": 0.00028925603864734297, "loss": 0.7191, "step": 660 }, { "epoch": 0.21, "learning_rate": 0.0002890628019323671, "loss": 0.6865, "step": 670 }, { "epoch": 0.22, "learning_rate": 0.0002888695652173913, "loss": 0.7331, "step": 680 }, { "epoch": 0.22, "learning_rate": 0.00028867632850241545, "loss": 0.652, "step": 690 }, { "epoch": 0.22, "learning_rate": 0.0002884830917874396, "loss": 0.7188, "step": 700 }, { "epoch": 0.23, "learning_rate": 0.00028828985507246374, "loss": 0.6834, "step": 710 }, { "epoch": 0.23, "learning_rate": 0.0002880966183574879, "loss": 0.693, "step": 720 }, { "epoch": 0.23, "learning_rate": 0.0002879033816425121, "loss": 0.7445, "step": 730 }, { "epoch": 0.24, "learning_rate": 0.0002877101449275362, "loss": 0.6904, "step": 740 }, { "epoch": 0.24, "learning_rate": 0.00028751690821256036, "loss": 0.7547, "step": 750 }, { "epoch": 0.24, "learning_rate": 0.0002873236714975845, "loss": 0.7068, "step": 760 }, { "epoch": 0.25, "learning_rate": 0.0002871304347826087, "loss": 0.6677, "step": 770 }, { "epoch": 0.25, "learning_rate": 0.00028693719806763285, "loss": 0.6808, "step": 780 }, { "epoch": 0.25, "learning_rate": 0.000286743961352657, "loss": 0.7142, "step": 790 }, { "epoch": 0.26, "learning_rate": 0.00028655072463768113, "loss": 0.7126, "step": 800 }, { "epoch": 0.26, "eval_loss": 0.7015364766120911, "eval_runtime": 149.6299, "eval_samples_per_second": 13.366, "eval_steps_per_second": 1.671, "step": 800 }, { "epoch": 0.26, "learning_rate": 0.0002863574879227053, "loss": 0.69, "step": 810 }, { "epoch": 0.26, "learning_rate": 0.00028616425120772947, "loss": 0.6772, "step": 820 }, { "epoch": 0.27, "learning_rate": 0.0002859710144927536, "loss": 0.6881, "step": 830 }, { "epoch": 0.27, "learning_rate": 0.00028577777777777776, "loss": 0.6509, "step": 840 }, { "epoch": 0.27, "learning_rate": 0.0002855845410628019, "loss": 0.6924, "step": 850 }, { "epoch": 0.28, "learning_rate": 0.00028539130434782604, "loss": 0.7116, "step": 860 }, { "epoch": 0.28, "learning_rate": 0.00028519806763285024, "loss": 0.711, "step": 870 }, { "epoch": 0.28, "learning_rate": 0.0002850048309178744, "loss": 0.6897, "step": 880 }, { "epoch": 0.28, "learning_rate": 0.0002848115942028985, "loss": 0.6689, "step": 890 }, { "epoch": 0.29, "learning_rate": 0.00028461835748792266, "loss": 0.6891, "step": 900 }, { "epoch": 0.29, "learning_rate": 0.00028442512077294686, "loss": 0.6985, "step": 910 }, { "epoch": 0.29, "learning_rate": 0.000284231884057971, "loss": 0.6643, "step": 920 }, { "epoch": 0.3, "learning_rate": 0.00028403864734299515, "loss": 0.7277, "step": 930 }, { "epoch": 0.3, "learning_rate": 0.0002838454106280193, "loss": 0.703, "step": 940 }, { "epoch": 0.3, "learning_rate": 0.00028365217391304343, "loss": 0.6678, "step": 950 }, { "epoch": 0.31, "learning_rate": 0.00028345893719806763, "loss": 0.6836, "step": 960 }, { "epoch": 0.31, "learning_rate": 0.00028326570048309177, "loss": 0.7164, "step": 970 }, { "epoch": 0.31, "learning_rate": 0.0002830724637681159, "loss": 0.6382, "step": 980 }, { "epoch": 0.32, "learning_rate": 0.00028287922705314006, "loss": 0.7, "step": 990 }, { "epoch": 0.32, "learning_rate": 0.0002826859903381642, "loss": 0.6974, "step": 1000 }, { "epoch": 0.32, "eval_loss": 0.6978325843811035, "eval_runtime": 149.6863, "eval_samples_per_second": 13.361, "eval_steps_per_second": 1.67, "step": 1000 }, { "epoch": 0.32, "learning_rate": 0.0002824927536231884, "loss": 0.6644, "step": 1010 }, { "epoch": 0.33, "learning_rate": 0.00028229951690821254, "loss": 0.672, "step": 1020 }, { "epoch": 0.33, "learning_rate": 0.0002821062801932367, "loss": 0.7036, "step": 1030 }, { "epoch": 0.33, "learning_rate": 0.0002819130434782608, "loss": 0.7049, "step": 1040 }, { "epoch": 0.34, "learning_rate": 0.000281719806763285, "loss": 0.7033, "step": 1050 }, { "epoch": 0.34, "learning_rate": 0.00028152657004830916, "loss": 0.7059, "step": 1060 }, { "epoch": 0.34, "learning_rate": 0.0002813333333333333, "loss": 0.6915, "step": 1070 }, { "epoch": 0.35, "learning_rate": 0.00028114009661835745, "loss": 0.7609, "step": 1080 }, { "epoch": 0.35, "learning_rate": 0.0002809468599033816, "loss": 0.6756, "step": 1090 }, { "epoch": 0.35, "learning_rate": 0.0002807536231884058, "loss": 0.7127, "step": 1100 }, { "epoch": 0.36, "learning_rate": 0.00028056038647342993, "loss": 0.7212, "step": 1110 }, { "epoch": 0.36, "learning_rate": 0.00028036714975845407, "loss": 0.6974, "step": 1120 }, { "epoch": 0.36, "learning_rate": 0.0002801739130434782, "loss": 0.6948, "step": 1130 }, { "epoch": 0.36, "learning_rate": 0.0002799806763285024, "loss": 0.73, "step": 1140 }, { "epoch": 0.37, "learning_rate": 0.00027978743961352655, "loss": 0.7154, "step": 1150 }, { "epoch": 0.37, "learning_rate": 0.0002795942028985507, "loss": 0.7007, "step": 1160 }, { "epoch": 0.37, "learning_rate": 0.00027940096618357484, "loss": 0.6854, "step": 1170 }, { "epoch": 0.38, "learning_rate": 0.000279207729468599, "loss": 0.7075, "step": 1180 }, { "epoch": 0.38, "learning_rate": 0.0002790144927536232, "loss": 0.6942, "step": 1190 }, { "epoch": 0.38, "learning_rate": 0.0002788212560386473, "loss": 0.7389, "step": 1200 }, { "epoch": 0.38, "eval_loss": 0.6969788670539856, "eval_runtime": 149.7859, "eval_samples_per_second": 13.352, "eval_steps_per_second": 1.669, "step": 1200 }, { "epoch": 0.39, "learning_rate": 0.00027862801932367146, "loss": 0.6633, "step": 1210 }, { "epoch": 0.39, "learning_rate": 0.0002784347826086956, "loss": 0.6741, "step": 1220 }, { "epoch": 0.39, "learning_rate": 0.00027824154589371975, "loss": 0.702, "step": 1230 }, { "epoch": 0.4, "learning_rate": 0.00027804830917874395, "loss": 0.691, "step": 1240 }, { "epoch": 0.4, "learning_rate": 0.0002778550724637681, "loss": 0.7049, "step": 1250 }, { "epoch": 0.4, "learning_rate": 0.00027766183574879223, "loss": 0.7254, "step": 1260 }, { "epoch": 0.41, "learning_rate": 0.0002774685990338164, "loss": 0.6732, "step": 1270 }, { "epoch": 0.41, "learning_rate": 0.00027727536231884057, "loss": 0.6995, "step": 1280 }, { "epoch": 0.41, "learning_rate": 0.0002770821256038647, "loss": 0.687, "step": 1290 }, { "epoch": 0.42, "learning_rate": 0.00027688888888888885, "loss": 0.6831, "step": 1300 }, { "epoch": 0.42, "learning_rate": 0.000276695652173913, "loss": 0.7189, "step": 1310 }, { "epoch": 0.42, "learning_rate": 0.00027650241545893714, "loss": 0.6996, "step": 1320 }, { "epoch": 0.43, "learning_rate": 0.00027630917874396134, "loss": 0.696, "step": 1330 }, { "epoch": 0.43, "learning_rate": 0.0002761159420289855, "loss": 0.7237, "step": 1340 }, { "epoch": 0.43, "learning_rate": 0.0002759227053140096, "loss": 0.7266, "step": 1350 }, { "epoch": 0.44, "learning_rate": 0.00027572946859903376, "loss": 0.6745, "step": 1360 }, { "epoch": 0.44, "learning_rate": 0.0002755362318840579, "loss": 0.7102, "step": 1370 }, { "epoch": 0.44, "learning_rate": 0.0002753429951690821, "loss": 0.6922, "step": 1380 }, { "epoch": 0.44, "learning_rate": 0.00027514975845410625, "loss": 0.7059, "step": 1390 }, { "epoch": 0.45, "learning_rate": 0.0002749565217391304, "loss": 0.7198, "step": 1400 }, { "epoch": 0.45, "eval_loss": 0.6949622631072998, "eval_runtime": 150.1806, "eval_samples_per_second": 13.317, "eval_steps_per_second": 1.665, "step": 1400 }, { "epoch": 0.45, "learning_rate": 0.00027476328502415453, "loss": 0.6608, "step": 1410 }, { "epoch": 0.45, "learning_rate": 0.00027457004830917873, "loss": 0.6699, "step": 1420 }, { "epoch": 0.46, "learning_rate": 0.00027437681159420287, "loss": 0.6817, "step": 1430 }, { "epoch": 0.46, "learning_rate": 0.000274183574879227, "loss": 0.7019, "step": 1440 }, { "epoch": 0.46, "learning_rate": 0.00027399033816425116, "loss": 0.6839, "step": 1450 }, { "epoch": 0.47, "learning_rate": 0.0002737971014492753, "loss": 0.6725, "step": 1460 }, { "epoch": 0.47, "learning_rate": 0.0002736038647342995, "loss": 0.7065, "step": 1470 }, { "epoch": 0.47, "learning_rate": 0.00027341062801932364, "loss": 0.6728, "step": 1480 }, { "epoch": 0.48, "learning_rate": 0.0002732173913043478, "loss": 0.6449, "step": 1490 }, { "epoch": 0.48, "learning_rate": 0.0002730241545893719, "loss": 0.7094, "step": 1500 }, { "epoch": 0.48, "learning_rate": 0.0002728309178743961, "loss": 0.6881, "step": 1510 }, { "epoch": 0.49, "learning_rate": 0.00027263768115942026, "loss": 0.6804, "step": 1520 }, { "epoch": 0.49, "learning_rate": 0.0002724444444444444, "loss": 0.6822, "step": 1530 }, { "epoch": 0.49, "learning_rate": 0.00027225120772946855, "loss": 0.6816, "step": 1540 }, { "epoch": 0.5, "learning_rate": 0.0002720579710144927, "loss": 0.6615, "step": 1550 }, { "epoch": 0.5, "learning_rate": 0.0002718647342995169, "loss": 0.6945, "step": 1560 }, { "epoch": 0.5, "learning_rate": 0.00027167149758454103, "loss": 0.7249, "step": 1570 }, { "epoch": 0.51, "learning_rate": 0.00027147826086956517, "loss": 0.7061, "step": 1580 }, { "epoch": 0.51, "learning_rate": 0.0002712850241545893, "loss": 0.6909, "step": 1590 }, { "epoch": 0.51, "learning_rate": 0.0002710917874396135, "loss": 0.7214, "step": 1600 }, { "epoch": 0.51, "eval_loss": 0.6923746466636658, "eval_runtime": 150.1003, "eval_samples_per_second": 13.324, "eval_steps_per_second": 1.666, "step": 1600 }, { "epoch": 0.52, "learning_rate": 0.00027089855072463765, "loss": 0.7448, "step": 1610 }, { "epoch": 0.52, "learning_rate": 0.0002707053140096618, "loss": 0.6746, "step": 1620 }, { "epoch": 0.52, "learning_rate": 0.00027051207729468594, "loss": 0.6952, "step": 1630 }, { "epoch": 0.52, "learning_rate": 0.0002703188405797101, "loss": 0.6985, "step": 1640 }, { "epoch": 0.53, "learning_rate": 0.0002701256038647343, "loss": 0.706, "step": 1650 }, { "epoch": 0.53, "learning_rate": 0.0002699323671497584, "loss": 0.6838, "step": 1660 }, { "epoch": 0.53, "learning_rate": 0.00026973913043478256, "loss": 0.6809, "step": 1670 }, { "epoch": 0.54, "learning_rate": 0.0002695458937198067, "loss": 0.7066, "step": 1680 }, { "epoch": 0.54, "learning_rate": 0.0002693526570048309, "loss": 0.6828, "step": 1690 }, { "epoch": 0.54, "learning_rate": 0.00026915942028985505, "loss": 0.6653, "step": 1700 }, { "epoch": 0.55, "learning_rate": 0.0002689661835748792, "loss": 0.6772, "step": 1710 }, { "epoch": 0.55, "learning_rate": 0.00026877294685990333, "loss": 0.6798, "step": 1720 }, { "epoch": 0.55, "learning_rate": 0.00026857971014492753, "loss": 0.6838, "step": 1730 }, { "epoch": 0.56, "learning_rate": 0.00026838647342995167, "loss": 0.7115, "step": 1740 }, { "epoch": 0.56, "learning_rate": 0.0002681932367149758, "loss": 0.6907, "step": 1750 }, { "epoch": 0.56, "learning_rate": 0.00026799999999999995, "loss": 0.6587, "step": 1760 }, { "epoch": 0.57, "learning_rate": 0.0002678067632850241, "loss": 0.7089, "step": 1770 }, { "epoch": 0.57, "learning_rate": 0.0002676135265700483, "loss": 0.6947, "step": 1780 }, { "epoch": 0.57, "learning_rate": 0.00026742028985507244, "loss": 0.698, "step": 1790 }, { "epoch": 0.58, "learning_rate": 0.0002672270531400966, "loss": 0.7183, "step": 1800 }, { "epoch": 0.58, "eval_loss": 0.6911550164222717, "eval_runtime": 368.3262, "eval_samples_per_second": 5.43, "eval_steps_per_second": 0.679, "step": 1800 }, { "epoch": 0.58, "learning_rate": 0.0002670338164251207, "loss": 0.7501, "step": 1810 }, { "epoch": 0.58, "learning_rate": 0.0002668405797101449, "loss": 0.6472, "step": 1820 }, { "epoch": 0.59, "learning_rate": 0.00026664734299516906, "loss": 0.6773, "step": 1830 }, { "epoch": 0.59, "learning_rate": 0.0002664541062801932, "loss": 0.7195, "step": 1840 }, { "epoch": 0.59, "learning_rate": 0.00026626086956521735, "loss": 0.7111, "step": 1850 }, { "epoch": 0.6, "learning_rate": 0.0002660676328502415, "loss": 0.6921, "step": 1860 }, { "epoch": 0.6, "learning_rate": 0.0002658743961352657, "loss": 0.6776, "step": 1870 }, { "epoch": 0.6, "learning_rate": 0.00026568115942028983, "loss": 0.6911, "step": 1880 }, { "epoch": 0.6, "learning_rate": 0.00026548792270531397, "loss": 0.7253, "step": 1890 }, { "epoch": 0.61, "learning_rate": 0.0002652946859903381, "loss": 0.672, "step": 1900 }, { "epoch": 0.61, "learning_rate": 0.0002651014492753623, "loss": 0.7156, "step": 1910 }, { "epoch": 0.61, "learning_rate": 0.00026490821256038645, "loss": 0.6929, "step": 1920 }, { "epoch": 0.62, "learning_rate": 0.0002647149758454106, "loss": 0.714, "step": 1930 }, { "epoch": 0.62, "learning_rate": 0.00026452173913043474, "loss": 0.6834, "step": 1940 }, { "epoch": 0.62, "learning_rate": 0.0002643285024154589, "loss": 0.7413, "step": 1950 }, { "epoch": 0.63, "learning_rate": 0.0002641352657004831, "loss": 0.6369, "step": 1960 }, { "epoch": 0.63, "learning_rate": 0.0002639420289855072, "loss": 0.6581, "step": 1970 }, { "epoch": 0.63, "learning_rate": 0.00026374879227053136, "loss": 0.644, "step": 1980 }, { "epoch": 0.64, "learning_rate": 0.0002635555555555555, "loss": 0.675, "step": 1990 }, { "epoch": 0.64, "learning_rate": 0.0002633623188405797, "loss": 0.6935, "step": 2000 }, { "epoch": 0.64, "eval_loss": 0.6894997954368591, "eval_runtime": 384.4995, "eval_samples_per_second": 5.202, "eval_steps_per_second": 0.65, "step": 2000 }, { "epoch": 0.64, "learning_rate": 0.00026316908212560384, "loss": 0.6739, "step": 2010 }, { "epoch": 0.65, "learning_rate": 0.000262975845410628, "loss": 0.719, "step": 2020 }, { "epoch": 0.65, "learning_rate": 0.00026278260869565213, "loss": 0.6724, "step": 2030 }, { "epoch": 0.65, "learning_rate": 0.0002625893719806763, "loss": 0.6736, "step": 2040 }, { "epoch": 0.66, "learning_rate": 0.00026239613526570047, "loss": 0.675, "step": 2050 }, { "epoch": 0.66, "learning_rate": 0.0002622028985507246, "loss": 0.7289, "step": 2060 }, { "epoch": 0.66, "learning_rate": 0.00026200966183574875, "loss": 0.7081, "step": 2070 }, { "epoch": 0.67, "learning_rate": 0.0002618164251207729, "loss": 0.6529, "step": 2080 }, { "epoch": 0.67, "learning_rate": 0.0002616231884057971, "loss": 0.6659, "step": 2090 }, { "epoch": 0.67, "learning_rate": 0.00026142995169082124, "loss": 0.663, "step": 2100 }, { "epoch": 0.68, "learning_rate": 0.0002612367149758454, "loss": 0.7021, "step": 2110 }, { "epoch": 0.68, "learning_rate": 0.0002610434782608695, "loss": 0.6856, "step": 2120 }, { "epoch": 0.68, "learning_rate": 0.0002608502415458937, "loss": 0.6762, "step": 2130 }, { "epoch": 0.68, "learning_rate": 0.00026065700483091786, "loss": 0.6754, "step": 2140 }, { "epoch": 0.69, "learning_rate": 0.000260463768115942, "loss": 0.7019, "step": 2150 }, { "epoch": 0.69, "learning_rate": 0.00026027053140096615, "loss": 0.6847, "step": 2160 }, { "epoch": 0.69, "learning_rate": 0.0002600772946859903, "loss": 0.6971, "step": 2170 }, { "epoch": 0.7, "learning_rate": 0.0002598840579710145, "loss": 0.6819, "step": 2180 }, { "epoch": 0.7, "learning_rate": 0.0002596908212560386, "loss": 0.6898, "step": 2190 }, { "epoch": 0.7, "learning_rate": 0.00025949758454106277, "loss": 0.698, "step": 2200 }, { "epoch": 0.7, "eval_loss": 0.6893799304962158, "eval_runtime": 386.4657, "eval_samples_per_second": 5.175, "eval_steps_per_second": 0.647, "step": 2200 }, { "epoch": 0.71, "learning_rate": 0.0002593043478260869, "loss": 0.6876, "step": 2210 }, { "epoch": 0.71, "learning_rate": 0.0002591111111111111, "loss": 0.6798, "step": 2220 }, { "epoch": 0.71, "learning_rate": 0.00025891787439613525, "loss": 0.706, "step": 2230 }, { "epoch": 0.72, "learning_rate": 0.0002587246376811594, "loss": 0.6495, "step": 2240 }, { "epoch": 0.72, "learning_rate": 0.00025853140096618354, "loss": 0.6646, "step": 2250 }, { "epoch": 0.72, "learning_rate": 0.0002583381642512077, "loss": 0.6417, "step": 2260 }, { "epoch": 0.73, "learning_rate": 0.0002581449275362319, "loss": 0.6681, "step": 2270 }, { "epoch": 0.73, "learning_rate": 0.000257951690821256, "loss": 0.6689, "step": 2280 }, { "epoch": 0.73, "learning_rate": 0.00025775845410628016, "loss": 0.6837, "step": 2290 }, { "epoch": 0.74, "learning_rate": 0.0002575652173913043, "loss": 0.7031, "step": 2300 }, { "epoch": 0.74, "learning_rate": 0.0002573719806763285, "loss": 0.6746, "step": 2310 }, { "epoch": 0.74, "learning_rate": 0.00025717874396135264, "loss": 0.6951, "step": 2320 }, { "epoch": 0.75, "learning_rate": 0.0002569855072463768, "loss": 0.6988, "step": 2330 }, { "epoch": 0.75, "learning_rate": 0.00025679227053140093, "loss": 0.6541, "step": 2340 }, { "epoch": 0.75, "learning_rate": 0.0002565990338164251, "loss": 0.6366, "step": 2350 }, { "epoch": 0.76, "learning_rate": 0.00025640579710144927, "loss": 0.7011, "step": 2360 }, { "epoch": 0.76, "learning_rate": 0.0002562125603864734, "loss": 0.6935, "step": 2370 }, { "epoch": 0.76, "learning_rate": 0.00025601932367149755, "loss": 0.6931, "step": 2380 }, { "epoch": 0.76, "learning_rate": 0.0002558260869565217, "loss": 0.7004, "step": 2390 }, { "epoch": 0.77, "learning_rate": 0.0002556328502415459, "loss": 0.6556, "step": 2400 }, { "epoch": 0.77, "eval_loss": 0.6892030239105225, "eval_runtime": 391.4843, "eval_samples_per_second": 5.109, "eval_steps_per_second": 0.639, "step": 2400 }, { "epoch": 0.77, "learning_rate": 0.00025543961352657003, "loss": 0.664, "step": 2410 }, { "epoch": 0.77, "learning_rate": 0.0002552463768115942, "loss": 0.7162, "step": 2420 }, { "epoch": 0.78, "learning_rate": 0.0002550531400966183, "loss": 0.646, "step": 2430 }, { "epoch": 0.78, "learning_rate": 0.0002548599033816425, "loss": 0.6515, "step": 2440 }, { "epoch": 0.78, "learning_rate": 0.00025466666666666666, "loss": 0.6953, "step": 2450 }, { "epoch": 0.79, "learning_rate": 0.0002544734299516908, "loss": 0.6887, "step": 2460 }, { "epoch": 0.79, "learning_rate": 0.00025428019323671494, "loss": 0.6739, "step": 2470 }, { "epoch": 0.79, "learning_rate": 0.0002540869565217391, "loss": 0.6923, "step": 2480 }, { "epoch": 0.8, "learning_rate": 0.0002538937198067633, "loss": 0.6877, "step": 2490 }, { "epoch": 0.8, "learning_rate": 0.0002537004830917874, "loss": 0.6865, "step": 2500 }, { "epoch": 0.8, "learning_rate": 0.00025350724637681157, "loss": 0.6337, "step": 2510 }, { "epoch": 0.81, "learning_rate": 0.0002533140096618357, "loss": 0.7073, "step": 2520 }, { "epoch": 0.81, "learning_rate": 0.0002531207729468599, "loss": 0.6973, "step": 2530 }, { "epoch": 0.81, "learning_rate": 0.00025292753623188405, "loss": 0.6719, "step": 2540 }, { "epoch": 0.82, "learning_rate": 0.0002527342995169082, "loss": 0.6674, "step": 2550 }, { "epoch": 0.82, "learning_rate": 0.00025254106280193234, "loss": 0.6745, "step": 2560 }, { "epoch": 0.82, "learning_rate": 0.0002523478260869565, "loss": 0.6914, "step": 2570 }, { "epoch": 0.83, "learning_rate": 0.0002521545893719807, "loss": 0.6382, "step": 2580 }, { "epoch": 0.83, "learning_rate": 0.0002519613526570048, "loss": 0.6644, "step": 2590 }, { "epoch": 0.83, "learning_rate": 0.00025176811594202896, "loss": 0.6892, "step": 2600 }, { "epoch": 0.83, "eval_loss": 0.6873727440834045, "eval_runtime": 390.6367, "eval_samples_per_second": 5.12, "eval_steps_per_second": 0.64, "step": 2600 }, { "epoch": 0.84, "learning_rate": 0.0002515748792270531, "loss": 0.6592, "step": 2610 }, { "epoch": 0.84, "learning_rate": 0.0002513816425120773, "loss": 0.6827, "step": 2620 }, { "epoch": 0.84, "learning_rate": 0.00025118840579710144, "loss": 0.6436, "step": 2630 }, { "epoch": 0.84, "learning_rate": 0.0002509951690821256, "loss": 0.6969, "step": 2640 }, { "epoch": 0.85, "learning_rate": 0.0002508019323671497, "loss": 0.6747, "step": 2650 }, { "epoch": 0.85, "learning_rate": 0.0002506086956521739, "loss": 0.697, "step": 2660 }, { "epoch": 0.85, "learning_rate": 0.00025041545893719807, "loss": 0.7146, "step": 2670 }, { "epoch": 0.86, "learning_rate": 0.0002502222222222222, "loss": 0.689, "step": 2680 }, { "epoch": 0.86, "learning_rate": 0.00025002898550724635, "loss": 0.6957, "step": 2690 }, { "epoch": 0.86, "learning_rate": 0.0002498357487922705, "loss": 0.708, "step": 2700 }, { "epoch": 0.87, "learning_rate": 0.0002496425120772947, "loss": 0.7306, "step": 2710 }, { "epoch": 0.87, "learning_rate": 0.00024944927536231883, "loss": 0.6418, "step": 2720 }, { "epoch": 0.87, "learning_rate": 0.000249256038647343, "loss": 0.6888, "step": 2730 }, { "epoch": 0.88, "learning_rate": 0.0002490628019323671, "loss": 0.6573, "step": 2740 }, { "epoch": 0.88, "learning_rate": 0.0002488695652173913, "loss": 0.7008, "step": 2750 }, { "epoch": 0.88, "learning_rate": 0.00024867632850241546, "loss": 0.689, "step": 2760 }, { "epoch": 0.89, "learning_rate": 0.0002484830917874396, "loss": 0.6739, "step": 2770 }, { "epoch": 0.89, "learning_rate": 0.00024828985507246374, "loss": 0.6867, "step": 2780 }, { "epoch": 0.89, "learning_rate": 0.0002480966183574879, "loss": 0.6874, "step": 2790 }, { "epoch": 0.9, "learning_rate": 0.0002479033816425121, "loss": 0.6858, "step": 2800 }, { "epoch": 0.9, "eval_loss": 0.686144232749939, "eval_runtime": 396.2716, "eval_samples_per_second": 5.047, "eval_steps_per_second": 0.631, "step": 2800 }, { "epoch": 0.9, "learning_rate": 0.0002477101449275362, "loss": 0.659, "step": 2810 }, { "epoch": 0.9, "learning_rate": 0.00024751690821256037, "loss": 0.6537, "step": 2820 }, { "epoch": 0.91, "learning_rate": 0.0002473236714975845, "loss": 0.7331, "step": 2830 }, { "epoch": 0.91, "learning_rate": 0.0002471304347826087, "loss": 0.6855, "step": 2840 }, { "epoch": 0.91, "learning_rate": 0.00024693719806763285, "loss": 0.7252, "step": 2850 }, { "epoch": 0.92, "learning_rate": 0.000246743961352657, "loss": 0.7, "step": 2860 }, { "epoch": 0.92, "learning_rate": 0.00024655072463768113, "loss": 0.6917, "step": 2870 }, { "epoch": 0.92, "learning_rate": 0.0002463574879227053, "loss": 0.6828, "step": 2880 }, { "epoch": 0.92, "learning_rate": 0.0002461642512077295, "loss": 0.6747, "step": 2890 }, { "epoch": 0.93, "learning_rate": 0.0002459710144927536, "loss": 0.696, "step": 2900 }, { "epoch": 0.93, "learning_rate": 0.00024577777777777776, "loss": 0.6573, "step": 2910 }, { "epoch": 0.93, "learning_rate": 0.0002455845410628019, "loss": 0.6811, "step": 2920 }, { "epoch": 0.94, "learning_rate": 0.0002453913043478261, "loss": 0.6992, "step": 2930 }, { "epoch": 0.94, "learning_rate": 0.00024519806763285024, "loss": 0.6972, "step": 2940 }, { "epoch": 0.94, "learning_rate": 0.0002450048309178744, "loss": 0.6685, "step": 2950 }, { "epoch": 0.95, "learning_rate": 0.0002448115942028985, "loss": 0.6531, "step": 2960 }, { "epoch": 0.95, "learning_rate": 0.0002446183574879227, "loss": 0.6749, "step": 2970 }, { "epoch": 0.95, "learning_rate": 0.00024442512077294686, "loss": 0.6811, "step": 2980 }, { "epoch": 0.96, "learning_rate": 0.000244231884057971, "loss": 0.6904, "step": 2990 }, { "epoch": 0.96, "learning_rate": 0.00024403864734299515, "loss": 0.6819, "step": 3000 }, { "epoch": 0.96, "eval_loss": 0.6858677864074707, "eval_runtime": 418.7569, "eval_samples_per_second": 4.776, "eval_steps_per_second": 0.597, "step": 3000 }, { "epoch": 0.96, "learning_rate": 0.0002438454106280193, "loss": 0.6773, "step": 3010 }, { "epoch": 0.97, "learning_rate": 0.00024365217391304346, "loss": 0.7034, "step": 3020 }, { "epoch": 0.97, "learning_rate": 0.0002434589371980676, "loss": 0.6804, "step": 3030 }, { "epoch": 0.97, "learning_rate": 0.00024326570048309177, "loss": 0.6912, "step": 3040 }, { "epoch": 0.98, "learning_rate": 0.00024307246376811592, "loss": 0.683, "step": 3050 }, { "epoch": 0.98, "learning_rate": 0.0002428792270531401, "loss": 0.6767, "step": 3060 }, { "epoch": 0.98, "learning_rate": 0.00024268599033816423, "loss": 0.6683, "step": 3070 }, { "epoch": 0.99, "learning_rate": 0.00024249275362318837, "loss": 0.6777, "step": 3080 }, { "epoch": 0.99, "learning_rate": 0.00024229951690821254, "loss": 0.6813, "step": 3090 }, { "epoch": 0.99, "learning_rate": 0.00024210628019323668, "loss": 0.6878, "step": 3100 }, { "epoch": 1.0, "learning_rate": 0.00024191304347826085, "loss": 0.6885, "step": 3110 }, { "epoch": 1.0, "learning_rate": 0.000241719806763285, "loss": 0.6373, "step": 3120 }, { "epoch": 1.0, "learning_rate": 0.00024152657004830917, "loss": 0.6982, "step": 3130 }, { "epoch": 1.0, "learning_rate": 0.0002413333333333333, "loss": 0.6485, "step": 3140 }, { "epoch": 1.01, "learning_rate": 0.00024114009661835748, "loss": 0.6522, "step": 3150 }, { "epoch": 1.01, "learning_rate": 0.00024094685990338162, "loss": 0.6092, "step": 3160 }, { "epoch": 1.01, "learning_rate": 0.00024075362318840576, "loss": 0.6362, "step": 3170 }, { "epoch": 1.02, "learning_rate": 0.00024056038647342993, "loss": 0.6725, "step": 3180 }, { "epoch": 1.02, "learning_rate": 0.00024036714975845408, "loss": 0.6718, "step": 3190 }, { "epoch": 1.02, "learning_rate": 0.00024017391304347825, "loss": 0.6738, "step": 3200 }, { "epoch": 1.02, "eval_loss": 0.6860418915748596, "eval_runtime": 407.1637, "eval_samples_per_second": 4.912, "eval_steps_per_second": 0.614, "step": 3200 }, { "epoch": 1.03, "learning_rate": 0.0002399806763285024, "loss": 0.658, "step": 3210 }, { "epoch": 1.03, "learning_rate": 0.00023978743961352656, "loss": 0.6325, "step": 3220 }, { "epoch": 1.03, "learning_rate": 0.0002395942028985507, "loss": 0.6372, "step": 3230 }, { "epoch": 1.04, "learning_rate": 0.00023940096618357487, "loss": 0.6075, "step": 3240 }, { "epoch": 1.04, "learning_rate": 0.000239207729468599, "loss": 0.6212, "step": 3250 }, { "epoch": 1.04, "learning_rate": 0.00023901449275362315, "loss": 0.6373, "step": 3260 }, { "epoch": 1.05, "learning_rate": 0.00023882125603864732, "loss": 0.6799, "step": 3270 }, { "epoch": 1.05, "learning_rate": 0.0002386280193236715, "loss": 0.6769, "step": 3280 }, { "epoch": 1.05, "learning_rate": 0.00023843478260869564, "loss": 0.6265, "step": 3290 }, { "epoch": 1.06, "learning_rate": 0.00023824154589371978, "loss": 0.6627, "step": 3300 }, { "epoch": 1.06, "learning_rate": 0.00023804830917874392, "loss": 0.6602, "step": 3310 }, { "epoch": 1.06, "learning_rate": 0.0002378550724637681, "loss": 0.671, "step": 3320 }, { "epoch": 1.07, "learning_rate": 0.00023766183574879226, "loss": 0.6297, "step": 3330 }, { "epoch": 1.07, "learning_rate": 0.0002374685990338164, "loss": 0.662, "step": 3340 }, { "epoch": 1.07, "learning_rate": 0.00023727536231884055, "loss": 0.6224, "step": 3350 }, { "epoch": 1.08, "learning_rate": 0.00023708212560386472, "loss": 0.6527, "step": 3360 }, { "epoch": 1.08, "learning_rate": 0.00023688888888888889, "loss": 0.6388, "step": 3370 }, { "epoch": 1.08, "learning_rate": 0.00023669565217391303, "loss": 0.6626, "step": 3380 }, { "epoch": 1.08, "learning_rate": 0.00023650241545893717, "loss": 0.6522, "step": 3390 }, { "epoch": 1.09, "learning_rate": 0.0002363091787439613, "loss": 0.6514, "step": 3400 }, { "epoch": 1.09, "eval_loss": 0.6873291730880737, "eval_runtime": 392.2057, "eval_samples_per_second": 5.099, "eval_steps_per_second": 0.637, "step": 3400 }, { "epoch": 1.09, "learning_rate": 0.00023611594202898548, "loss": 0.6147, "step": 3410 }, { "epoch": 1.09, "learning_rate": 0.00023592270531400965, "loss": 0.6282, "step": 3420 }, { "epoch": 1.1, "learning_rate": 0.0002357294685990338, "loss": 0.6168, "step": 3430 }, { "epoch": 1.1, "learning_rate": 0.00023553623188405794, "loss": 0.6858, "step": 3440 }, { "epoch": 1.1, "learning_rate": 0.00023534299516908208, "loss": 0.6219, "step": 3450 }, { "epoch": 1.11, "learning_rate": 0.00023514975845410628, "loss": 0.6566, "step": 3460 }, { "epoch": 1.11, "learning_rate": 0.00023495652173913042, "loss": 0.6633, "step": 3470 }, { "epoch": 1.11, "learning_rate": 0.00023476328502415456, "loss": 0.6526, "step": 3480 }, { "epoch": 1.12, "learning_rate": 0.0002345700483091787, "loss": 0.6544, "step": 3490 }, { "epoch": 1.12, "learning_rate": 0.00023437681159420287, "loss": 0.6588, "step": 3500 }, { "epoch": 1.12, "learning_rate": 0.00023418357487922704, "loss": 0.6477, "step": 3510 }, { "epoch": 1.13, "learning_rate": 0.0002339903381642512, "loss": 0.6494, "step": 3520 }, { "epoch": 1.13, "learning_rate": 0.00023379710144927533, "loss": 0.6219, "step": 3530 }, { "epoch": 1.13, "learning_rate": 0.00023360386473429947, "loss": 0.6369, "step": 3540 }, { "epoch": 1.14, "learning_rate": 0.00023341062801932367, "loss": 0.6647, "step": 3550 }, { "epoch": 1.14, "learning_rate": 0.0002332173913043478, "loss": 0.6477, "step": 3560 }, { "epoch": 1.14, "learning_rate": 0.00023302415458937195, "loss": 0.6721, "step": 3570 }, { "epoch": 1.15, "learning_rate": 0.0002328309178743961, "loss": 0.6327, "step": 3580 }, { "epoch": 1.15, "learning_rate": 0.0002326376811594203, "loss": 0.6668, "step": 3590 }, { "epoch": 1.15, "learning_rate": 0.00023244444444444444, "loss": 0.6422, "step": 3600 }, { "epoch": 1.15, "eval_loss": 0.6869779229164124, "eval_runtime": 396.5319, "eval_samples_per_second": 5.044, "eval_steps_per_second": 0.63, "step": 3600 }, { "epoch": 1.16, "learning_rate": 0.00023225120772946858, "loss": 0.6774, "step": 3610 }, { "epoch": 1.16, "learning_rate": 0.00023205797101449272, "loss": 0.6663, "step": 3620 }, { "epoch": 1.16, "learning_rate": 0.00023186473429951686, "loss": 0.6903, "step": 3630 }, { "epoch": 1.16, "learning_rate": 0.00023167149758454106, "loss": 0.6362, "step": 3640 }, { "epoch": 1.17, "learning_rate": 0.0002314782608695652, "loss": 0.6187, "step": 3650 }, { "epoch": 1.17, "learning_rate": 0.00023128502415458935, "loss": 0.6256, "step": 3660 }, { "epoch": 1.17, "learning_rate": 0.0002310917874396135, "loss": 0.6493, "step": 3670 }, { "epoch": 1.18, "learning_rate": 0.00023089855072463768, "loss": 0.635, "step": 3680 }, { "epoch": 1.18, "learning_rate": 0.00023070531400966183, "loss": 0.6973, "step": 3690 }, { "epoch": 1.18, "learning_rate": 0.00023051207729468597, "loss": 0.6562, "step": 3700 }, { "epoch": 1.19, "learning_rate": 0.0002303188405797101, "loss": 0.6327, "step": 3710 }, { "epoch": 1.19, "learning_rate": 0.00023012560386473425, "loss": 0.6736, "step": 3720 }, { "epoch": 1.19, "learning_rate": 0.00022993236714975845, "loss": 0.6178, "step": 3730 }, { "epoch": 1.2, "learning_rate": 0.0002297391304347826, "loss": 0.6574, "step": 3740 }, { "epoch": 1.2, "learning_rate": 0.00022954589371980674, "loss": 0.6848, "step": 3750 }, { "epoch": 1.2, "learning_rate": 0.00022935265700483088, "loss": 0.6478, "step": 3760 }, { "epoch": 1.21, "learning_rate": 0.00022915942028985508, "loss": 0.6144, "step": 3770 }, { "epoch": 1.21, "learning_rate": 0.00022896618357487922, "loss": 0.6609, "step": 3780 }, { "epoch": 1.21, "learning_rate": 0.00022877294685990336, "loss": 0.6744, "step": 3790 }, { "epoch": 1.22, "learning_rate": 0.0002285797101449275, "loss": 0.636, "step": 3800 }, { "epoch": 1.22, "eval_loss": 0.6865532398223877, "eval_runtime": 393.5432, "eval_samples_per_second": 5.082, "eval_steps_per_second": 0.635, "step": 3800 }, { "epoch": 1.22, "learning_rate": 0.00022838647342995165, "loss": 0.654, "step": 3810 }, { "epoch": 1.22, "learning_rate": 0.00022819323671497584, "loss": 0.6442, "step": 3820 }, { "epoch": 1.23, "learning_rate": 0.00022799999999999999, "loss": 0.6835, "step": 3830 }, { "epoch": 1.23, "learning_rate": 0.00022780676328502413, "loss": 0.6339, "step": 3840 }, { "epoch": 1.23, "learning_rate": 0.00022761352657004827, "loss": 0.6512, "step": 3850 }, { "epoch": 1.24, "learning_rate": 0.00022742028985507247, "loss": 0.622, "step": 3860 }, { "epoch": 1.24, "learning_rate": 0.0002272270531400966, "loss": 0.6802, "step": 3870 }, { "epoch": 1.24, "learning_rate": 0.00022703381642512075, "loss": 0.6911, "step": 3880 }, { "epoch": 1.24, "learning_rate": 0.0002268405797101449, "loss": 0.6815, "step": 3890 }, { "epoch": 1.25, "learning_rate": 0.0002266473429951691, "loss": 0.6569, "step": 3900 }, { "epoch": 1.25, "learning_rate": 0.00022645410628019323, "loss": 0.6363, "step": 3910 }, { "epoch": 1.25, "learning_rate": 0.00022626086956521738, "loss": 0.6371, "step": 3920 }, { "epoch": 1.26, "learning_rate": 0.00022606763285024152, "loss": 0.6458, "step": 3930 }, { "epoch": 1.26, "learning_rate": 0.00022587439613526566, "loss": 0.668, "step": 3940 }, { "epoch": 1.26, "learning_rate": 0.00022568115942028986, "loss": 0.6676, "step": 3950 }, { "epoch": 1.27, "learning_rate": 0.000225487922705314, "loss": 0.6692, "step": 3960 }, { "epoch": 1.27, "learning_rate": 0.00022529468599033814, "loss": 0.6502, "step": 3970 }, { "epoch": 1.27, "learning_rate": 0.0002251014492753623, "loss": 0.6387, "step": 3980 }, { "epoch": 1.28, "learning_rate": 0.00022490821256038646, "loss": 0.6475, "step": 3990 }, { "epoch": 1.28, "learning_rate": 0.00022471497584541063, "loss": 0.6627, "step": 4000 }, { "epoch": 1.28, "eval_loss": 0.6856961846351624, "eval_runtime": 387.1458, "eval_samples_per_second": 5.166, "eval_steps_per_second": 0.646, "step": 4000 }, { "epoch": 1.28, "learning_rate": 0.00022452173913043477, "loss": 0.6346, "step": 4010 }, { "epoch": 1.29, "learning_rate": 0.0002243285024154589, "loss": 0.6593, "step": 4020 }, { "epoch": 1.29, "learning_rate": 0.00022413526570048305, "loss": 0.6397, "step": 4030 }, { "epoch": 1.29, "learning_rate": 0.00022394202898550725, "loss": 0.641, "step": 4040 }, { "epoch": 1.3, "learning_rate": 0.0002237487922705314, "loss": 0.6766, "step": 4050 }, { "epoch": 1.3, "learning_rate": 0.00022355555555555554, "loss": 0.6931, "step": 4060 }, { "epoch": 1.3, "learning_rate": 0.00022336231884057968, "loss": 0.6618, "step": 4070 }, { "epoch": 1.31, "learning_rate": 0.00022316908212560385, "loss": 0.6655, "step": 4080 }, { "epoch": 1.31, "learning_rate": 0.00022297584541062802, "loss": 0.647, "step": 4090 }, { "epoch": 1.31, "learning_rate": 0.00022278260869565216, "loss": 0.6337, "step": 4100 }, { "epoch": 1.32, "learning_rate": 0.0002225893719806763, "loss": 0.6957, "step": 4110 }, { "epoch": 1.32, "learning_rate": 0.00022239613526570044, "loss": 0.6435, "step": 4120 }, { "epoch": 1.32, "learning_rate": 0.00022220289855072464, "loss": 0.6272, "step": 4130 }, { "epoch": 1.32, "learning_rate": 0.00022200966183574878, "loss": 0.6502, "step": 4140 }, { "epoch": 1.33, "learning_rate": 0.00022181642512077293, "loss": 0.6514, "step": 4150 }, { "epoch": 1.33, "learning_rate": 0.00022162318840579707, "loss": 0.6625, "step": 4160 }, { "epoch": 1.33, "learning_rate": 0.00022142995169082124, "loss": 0.6284, "step": 4170 }, { "epoch": 1.34, "learning_rate": 0.0002212367149758454, "loss": 0.6503, "step": 4180 }, { "epoch": 1.34, "learning_rate": 0.00022104347826086955, "loss": 0.6425, "step": 4190 }, { "epoch": 1.34, "learning_rate": 0.0002208502415458937, "loss": 0.6818, "step": 4200 }, { "epoch": 1.34, "eval_loss": 0.6859603524208069, "eval_runtime": 382.589, "eval_samples_per_second": 5.228, "eval_steps_per_second": 0.653, "step": 4200 }, { "epoch": 1.35, "learning_rate": 0.00022065700483091784, "loss": 0.6299, "step": 4210 }, { "epoch": 1.35, "learning_rate": 0.000220463768115942, "loss": 0.6508, "step": 4220 }, { "epoch": 1.35, "learning_rate": 0.00022027053140096618, "loss": 0.6572, "step": 4230 }, { "epoch": 1.36, "learning_rate": 0.00022007729468599032, "loss": 0.6781, "step": 4240 }, { "epoch": 1.36, "learning_rate": 0.00021988405797101446, "loss": 0.6806, "step": 4250 }, { "epoch": 1.36, "learning_rate": 0.00021969082125603863, "loss": 0.632, "step": 4260 }, { "epoch": 1.37, "learning_rate": 0.0002194975845410628, "loss": 0.6631, "step": 4270 }, { "epoch": 1.37, "learning_rate": 0.00021930434782608694, "loss": 0.6729, "step": 4280 }, { "epoch": 1.37, "learning_rate": 0.00021911111111111109, "loss": 0.6618, "step": 4290 }, { "epoch": 1.38, "learning_rate": 0.00021891787439613525, "loss": 0.6132, "step": 4300 }, { "epoch": 1.38, "learning_rate": 0.0002187246376811594, "loss": 0.6457, "step": 4310 }, { "epoch": 1.38, "learning_rate": 0.00021853140096618357, "loss": 0.6443, "step": 4320 }, { "epoch": 1.39, "learning_rate": 0.0002183381642512077, "loss": 0.653, "step": 4330 }, { "epoch": 1.39, "learning_rate": 0.00021814492753623185, "loss": 0.6706, "step": 4340 }, { "epoch": 1.39, "learning_rate": 0.00021795169082125602, "loss": 0.6684, "step": 4350 }, { "epoch": 1.4, "learning_rate": 0.00021775845410628016, "loss": 0.6597, "step": 4360 }, { "epoch": 1.4, "learning_rate": 0.00021756521739130433, "loss": 0.6478, "step": 4370 }, { "epoch": 1.4, "learning_rate": 0.00021737198067632848, "loss": 0.6411, "step": 4380 }, { "epoch": 1.4, "learning_rate": 0.00021717874396135265, "loss": 0.657, "step": 4390 }, { "epoch": 1.41, "learning_rate": 0.0002169855072463768, "loss": 0.663, "step": 4400 }, { "epoch": 1.41, "eval_loss": 0.6874400973320007, "eval_runtime": 387.4088, "eval_samples_per_second": 5.163, "eval_steps_per_second": 0.645, "step": 4400 }, { "epoch": 1.41, "learning_rate": 0.00021679227053140096, "loss": 0.6833, "step": 4410 }, { "epoch": 1.41, "learning_rate": 0.0002165990338164251, "loss": 0.6827, "step": 4420 }, { "epoch": 1.42, "learning_rate": 0.00021640579710144924, "loss": 0.6789, "step": 4430 }, { "epoch": 1.42, "learning_rate": 0.0002162125603864734, "loss": 0.6582, "step": 4440 }, { "epoch": 1.42, "learning_rate": 0.00021601932367149756, "loss": 0.6222, "step": 4450 }, { "epoch": 1.43, "learning_rate": 0.00021582608695652173, "loss": 0.6314, "step": 4460 }, { "epoch": 1.43, "learning_rate": 0.00021563285024154587, "loss": 0.6466, "step": 4470 }, { "epoch": 1.43, "learning_rate": 0.00021543961352657004, "loss": 0.6734, "step": 4480 }, { "epoch": 1.44, "learning_rate": 0.00021524637681159418, "loss": 0.6552, "step": 4490 }, { "epoch": 1.44, "learning_rate": 0.00021505314009661835, "loss": 0.7156, "step": 4500 }, { "epoch": 1.44, "learning_rate": 0.0002148599033816425, "loss": 0.6548, "step": 4510 }, { "epoch": 1.45, "learning_rate": 0.00021466666666666664, "loss": 0.7265, "step": 4520 }, { "epoch": 1.45, "learning_rate": 0.0002144734299516908, "loss": 0.6757, "step": 4530 }, { "epoch": 1.45, "learning_rate": 0.00021428019323671495, "loss": 0.6914, "step": 4540 }, { "epoch": 1.46, "learning_rate": 0.00021408695652173912, "loss": 0.6746, "step": 4550 }, { "epoch": 1.46, "learning_rate": 0.00021389371980676326, "loss": 0.7085, "step": 4560 }, { "epoch": 1.46, "learning_rate": 0.00021370048309178743, "loss": 0.7058, "step": 4570 }, { "epoch": 1.47, "learning_rate": 0.00021350724637681157, "loss": 0.6599, "step": 4580 }, { "epoch": 1.47, "learning_rate": 0.00021331400966183571, "loss": 0.6653, "step": 4590 }, { "epoch": 1.47, "learning_rate": 0.00021312077294685988, "loss": 0.6757, "step": 4600 }, { "epoch": 1.47, "eval_loss": 0.6850671172142029, "eval_runtime": 410.1023, "eval_samples_per_second": 4.877, "eval_steps_per_second": 0.61, "step": 4600 }, { "epoch": 1.48, "learning_rate": 0.00021292753623188405, "loss": 0.6644, "step": 4610 }, { "epoch": 1.48, "learning_rate": 0.0002127342995169082, "loss": 0.6444, "step": 4620 }, { "epoch": 1.48, "learning_rate": 0.00021254106280193234, "loss": 0.6548, "step": 4630 }, { "epoch": 1.48, "learning_rate": 0.0002123478260869565, "loss": 0.6361, "step": 4640 }, { "epoch": 1.49, "learning_rate": 0.00021215458937198065, "loss": 0.6726, "step": 4650 }, { "epoch": 1.49, "learning_rate": 0.00021196135265700482, "loss": 0.6451, "step": 4660 }, { "epoch": 1.49, "learning_rate": 0.00021176811594202896, "loss": 0.6939, "step": 4670 }, { "epoch": 1.5, "learning_rate": 0.0002115748792270531, "loss": 0.6569, "step": 4680 }, { "epoch": 1.5, "learning_rate": 0.00021138164251207728, "loss": 0.6405, "step": 4690 }, { "epoch": 1.5, "learning_rate": 0.00021118840579710145, "loss": 0.6261, "step": 4700 }, { "epoch": 1.51, "learning_rate": 0.0002109951690821256, "loss": 0.6544, "step": 4710 }, { "epoch": 1.51, "learning_rate": 0.00021080193236714973, "loss": 0.6367, "step": 4720 }, { "epoch": 1.51, "learning_rate": 0.0002106086956521739, "loss": 0.6691, "step": 4730 }, { "epoch": 1.52, "learning_rate": 0.00021041545893719804, "loss": 0.6228, "step": 4740 }, { "epoch": 1.52, "learning_rate": 0.0002102222222222222, "loss": 0.6628, "step": 4750 }, { "epoch": 1.52, "learning_rate": 0.00021002898550724635, "loss": 0.6678, "step": 4760 }, { "epoch": 1.53, "learning_rate": 0.0002098357487922705, "loss": 0.6699, "step": 4770 }, { "epoch": 1.53, "learning_rate": 0.00020964251207729467, "loss": 0.683, "step": 4780 }, { "epoch": 1.53, "learning_rate": 0.00020944927536231884, "loss": 0.688, "step": 4790 }, { "epoch": 1.54, "learning_rate": 0.00020925603864734298, "loss": 0.6661, "step": 4800 }, { "epoch": 1.54, "eval_loss": 0.6855675578117371, "eval_runtime": 385.7747, "eval_samples_per_second": 5.184, "eval_steps_per_second": 0.648, "step": 4800 }, { "epoch": 1.54, "learning_rate": 0.00020906280193236712, "loss": 0.6605, "step": 4810 }, { "epoch": 1.54, "learning_rate": 0.00020886956521739126, "loss": 0.7119, "step": 4820 }, { "epoch": 1.55, "learning_rate": 0.00020867632850241543, "loss": 0.6181, "step": 4830 }, { "epoch": 1.55, "learning_rate": 0.0002084830917874396, "loss": 0.6484, "step": 4840 }, { "epoch": 1.55, "learning_rate": 0.00020828985507246375, "loss": 0.6747, "step": 4850 }, { "epoch": 1.56, "learning_rate": 0.0002080966183574879, "loss": 0.6455, "step": 4860 }, { "epoch": 1.56, "learning_rate": 0.00020790338164251206, "loss": 0.6591, "step": 4870 }, { "epoch": 1.56, "learning_rate": 0.00020771014492753623, "loss": 0.6898, "step": 4880 }, { "epoch": 1.56, "learning_rate": 0.00020751690821256037, "loss": 0.6491, "step": 4890 }, { "epoch": 1.57, "learning_rate": 0.0002073236714975845, "loss": 0.66, "step": 4900 }, { "epoch": 1.57, "learning_rate": 0.00020713043478260866, "loss": 0.6423, "step": 4910 }, { "epoch": 1.57, "learning_rate": 0.00020693719806763285, "loss": 0.6102, "step": 4920 }, { "epoch": 1.58, "learning_rate": 0.000206743961352657, "loss": 0.6709, "step": 4930 }, { "epoch": 1.58, "learning_rate": 0.00020655072463768114, "loss": 0.6432, "step": 4940 }, { "epoch": 1.58, "learning_rate": 0.00020635748792270528, "loss": 0.6388, "step": 4950 }, { "epoch": 1.59, "learning_rate": 0.00020616425120772942, "loss": 0.6574, "step": 4960 }, { "epoch": 1.59, "learning_rate": 0.00020597101449275362, "loss": 0.6687, "step": 4970 }, { "epoch": 1.59, "learning_rate": 0.00020577777777777776, "loss": 0.674, "step": 4980 }, { "epoch": 1.6, "learning_rate": 0.0002055845410628019, "loss": 0.618, "step": 4990 }, { "epoch": 1.6, "learning_rate": 0.00020539130434782605, "loss": 0.689, "step": 5000 }, { "epoch": 1.6, "eval_loss": 0.6850703358650208, "eval_runtime": 372.4881, "eval_samples_per_second": 5.369, "eval_steps_per_second": 0.671, "step": 5000 }, { "epoch": 1.6, "learning_rate": 0.00020519806763285024, "loss": 0.6865, "step": 5010 }, { "epoch": 1.61, "learning_rate": 0.0002050048309178744, "loss": 0.6529, "step": 5020 }, { "epoch": 1.61, "learning_rate": 0.00020481159420289853, "loss": 0.6495, "step": 5030 }, { "epoch": 1.61, "learning_rate": 0.00020461835748792267, "loss": 0.6517, "step": 5040 }, { "epoch": 1.62, "learning_rate": 0.00020442512077294681, "loss": 0.6684, "step": 5050 }, { "epoch": 1.62, "learning_rate": 0.000204231884057971, "loss": 0.6412, "step": 5060 }, { "epoch": 1.62, "learning_rate": 0.00020403864734299515, "loss": 0.6463, "step": 5070 }, { "epoch": 1.63, "learning_rate": 0.0002038454106280193, "loss": 0.633, "step": 5080 }, { "epoch": 1.63, "learning_rate": 0.00020365217391304344, "loss": 0.6473, "step": 5090 }, { "epoch": 1.63, "learning_rate": 0.00020345893719806764, "loss": 0.6102, "step": 5100 }, { "epoch": 1.64, "learning_rate": 0.00020326570048309178, "loss": 0.6816, "step": 5110 }, { "epoch": 1.64, "learning_rate": 0.00020307246376811592, "loss": 0.6743, "step": 5120 }, { "epoch": 1.64, "learning_rate": 0.00020287922705314006, "loss": 0.6745, "step": 5130 }, { "epoch": 1.64, "learning_rate": 0.0002026859903381642, "loss": 0.6435, "step": 5140 }, { "epoch": 1.65, "learning_rate": 0.0002024927536231884, "loss": 0.6675, "step": 5150 }, { "epoch": 1.65, "learning_rate": 0.00020229951690821255, "loss": 0.6547, "step": 5160 }, { "epoch": 1.65, "learning_rate": 0.0002021062801932367, "loss": 0.6464, "step": 5170 }, { "epoch": 1.66, "learning_rate": 0.00020191304347826083, "loss": 0.6894, "step": 5180 }, { "epoch": 1.66, "learning_rate": 0.00020171980676328503, "loss": 0.6613, "step": 5190 }, { "epoch": 1.66, "learning_rate": 0.00020152657004830917, "loss": 0.6362, "step": 5200 }, { "epoch": 1.66, "eval_loss": 0.6850407719612122, "eval_runtime": 377.0156, "eval_samples_per_second": 5.305, "eval_steps_per_second": 0.663, "step": 5200 }, { "epoch": 1.67, "learning_rate": 0.0002013333333333333, "loss": 0.636, "step": 5210 }, { "epoch": 1.67, "learning_rate": 0.00020114009661835745, "loss": 0.6731, "step": 5220 }, { "epoch": 1.67, "learning_rate": 0.00020094685990338165, "loss": 0.6429, "step": 5230 }, { "epoch": 1.68, "learning_rate": 0.0002007536231884058, "loss": 0.6808, "step": 5240 }, { "epoch": 1.68, "learning_rate": 0.00020056038647342994, "loss": 0.6728, "step": 5250 }, { "epoch": 1.68, "learning_rate": 0.00020036714975845408, "loss": 0.6377, "step": 5260 }, { "epoch": 1.69, "learning_rate": 0.00020017391304347822, "loss": 0.6616, "step": 5270 }, { "epoch": 1.69, "learning_rate": 0.00019998067632850242, "loss": 0.6571, "step": 5280 }, { "epoch": 1.69, "learning_rate": 0.00019978743961352656, "loss": 0.6765, "step": 5290 }, { "epoch": 1.7, "learning_rate": 0.0001995942028985507, "loss": 0.6636, "step": 5300 }, { "epoch": 1.7, "learning_rate": 0.00019940096618357485, "loss": 0.6621, "step": 5310 }, { "epoch": 1.7, "learning_rate": 0.00019920772946859904, "loss": 0.6788, "step": 5320 }, { "epoch": 1.71, "learning_rate": 0.00019901449275362319, "loss": 0.6491, "step": 5330 }, { "epoch": 1.71, "learning_rate": 0.00019882125603864733, "loss": 0.6413, "step": 5340 }, { "epoch": 1.71, "learning_rate": 0.00019862801932367147, "loss": 0.6287, "step": 5350 }, { "epoch": 1.72, "learning_rate": 0.0001984347826086956, "loss": 0.6624, "step": 5360 }, { "epoch": 1.72, "learning_rate": 0.0001982415458937198, "loss": 0.6657, "step": 5370 }, { "epoch": 1.72, "learning_rate": 0.00019804830917874395, "loss": 0.6617, "step": 5380 }, { "epoch": 1.72, "learning_rate": 0.0001978550724637681, "loss": 0.6791, "step": 5390 }, { "epoch": 1.73, "learning_rate": 0.00019766183574879224, "loss": 0.6537, "step": 5400 }, { "epoch": 1.73, "eval_loss": 0.683772623538971, "eval_runtime": 380.6447, "eval_samples_per_second": 5.254, "eval_steps_per_second": 0.657, "step": 5400 }, { "epoch": 1.73, "learning_rate": 0.00019746859903381643, "loss": 0.5705, "step": 5410 }, { "epoch": 1.73, "learning_rate": 0.00019727536231884058, "loss": 0.634, "step": 5420 }, { "epoch": 1.74, "learning_rate": 0.00019708212560386472, "loss": 0.6558, "step": 5430 }, { "epoch": 1.74, "learning_rate": 0.00019688888888888886, "loss": 0.657, "step": 5440 }, { "epoch": 1.74, "learning_rate": 0.000196695652173913, "loss": 0.6615, "step": 5450 }, { "epoch": 1.75, "learning_rate": 0.0001965024154589372, "loss": 0.6656, "step": 5460 }, { "epoch": 1.75, "learning_rate": 0.00019630917874396134, "loss": 0.6286, "step": 5470 }, { "epoch": 1.75, "learning_rate": 0.0001961159420289855, "loss": 0.657, "step": 5480 }, { "epoch": 1.76, "learning_rate": 0.00019592270531400963, "loss": 0.6891, "step": 5490 }, { "epoch": 1.76, "learning_rate": 0.0001957294685990338, "loss": 0.6539, "step": 5500 }, { "epoch": 1.76, "learning_rate": 0.00019553623188405797, "loss": 0.6335, "step": 5510 }, { "epoch": 1.77, "learning_rate": 0.0001953429951690821, "loss": 0.6625, "step": 5520 }, { "epoch": 1.77, "learning_rate": 0.00019514975845410625, "loss": 0.6597, "step": 5530 }, { "epoch": 1.77, "learning_rate": 0.00019495652173913042, "loss": 0.6753, "step": 5540 }, { "epoch": 1.78, "learning_rate": 0.0001947632850241546, "loss": 0.6644, "step": 5550 }, { "epoch": 1.78, "learning_rate": 0.00019457004830917874, "loss": 0.6434, "step": 5560 }, { "epoch": 1.78, "learning_rate": 0.00019437681159420288, "loss": 0.6604, "step": 5570 }, { "epoch": 1.79, "learning_rate": 0.00019418357487922702, "loss": 0.6538, "step": 5580 }, { "epoch": 1.79, "learning_rate": 0.0001939903381642512, "loss": 0.639, "step": 5590 }, { "epoch": 1.79, "learning_rate": 0.00019379710144927536, "loss": 0.6668, "step": 5600 }, { "epoch": 1.79, "eval_loss": 0.6843588352203369, "eval_runtime": 378.4765, "eval_samples_per_second": 5.284, "eval_steps_per_second": 0.661, "step": 5600 }, { "epoch": 1.8, "learning_rate": 0.0001936038647342995, "loss": 0.6375, "step": 5610 }, { "epoch": 1.8, "learning_rate": 0.00019341062801932364, "loss": 0.6455, "step": 5620 }, { "epoch": 1.8, "learning_rate": 0.00019321739130434781, "loss": 0.6856, "step": 5630 }, { "epoch": 1.8, "learning_rate": 0.00019302415458937198, "loss": 0.648, "step": 5640 }, { "epoch": 1.81, "learning_rate": 0.00019283091787439613, "loss": 0.6005, "step": 5650 }, { "epoch": 1.81, "learning_rate": 0.00019263768115942027, "loss": 0.6719, "step": 5660 }, { "epoch": 1.81, "learning_rate": 0.0001924444444444444, "loss": 0.6588, "step": 5670 }, { "epoch": 1.82, "learning_rate": 0.00019225120772946858, "loss": 0.6517, "step": 5680 }, { "epoch": 1.82, "learning_rate": 0.00019205797101449275, "loss": 0.6489, "step": 5690 }, { "epoch": 1.82, "learning_rate": 0.0001918647342995169, "loss": 0.6283, "step": 5700 }, { "epoch": 1.83, "learning_rate": 0.00019167149758454104, "loss": 0.6523, "step": 5710 }, { "epoch": 1.83, "learning_rate": 0.0001914782608695652, "loss": 0.5914, "step": 5720 }, { "epoch": 1.83, "learning_rate": 0.00019128502415458935, "loss": 0.6403, "step": 5730 }, { "epoch": 1.84, "learning_rate": 0.00019109178743961352, "loss": 0.6426, "step": 5740 }, { "epoch": 1.84, "learning_rate": 0.00019089855072463766, "loss": 0.6609, "step": 5750 }, { "epoch": 1.84, "learning_rate": 0.0001907053140096618, "loss": 0.6394, "step": 5760 }, { "epoch": 1.85, "learning_rate": 0.00019051207729468597, "loss": 0.6546, "step": 5770 }, { "epoch": 1.85, "learning_rate": 0.00019031884057971014, "loss": 0.6879, "step": 5780 }, { "epoch": 1.85, "learning_rate": 0.00019012560386473429, "loss": 0.6116, "step": 5790 }, { "epoch": 1.86, "learning_rate": 0.00018993236714975843, "loss": 0.6477, "step": 5800 }, { "epoch": 1.86, "eval_loss": 0.6840969920158386, "eval_runtime": 385.7288, "eval_samples_per_second": 5.185, "eval_steps_per_second": 0.648, "step": 5800 }, { "epoch": 1.86, "learning_rate": 0.0001897391304347826, "loss": 0.6657, "step": 5810 }, { "epoch": 1.86, "learning_rate": 0.00018954589371980674, "loss": 0.6399, "step": 5820 }, { "epoch": 1.87, "learning_rate": 0.0001893526570048309, "loss": 0.6587, "step": 5830 }, { "epoch": 1.87, "learning_rate": 0.00018915942028985505, "loss": 0.6333, "step": 5840 }, { "epoch": 1.87, "learning_rate": 0.00018896618357487922, "loss": 0.6327, "step": 5850 }, { "epoch": 1.88, "learning_rate": 0.00018877294685990336, "loss": 0.6788, "step": 5860 }, { "epoch": 1.88, "learning_rate": 0.0001885797101449275, "loss": 0.6765, "step": 5870 }, { "epoch": 1.88, "learning_rate": 0.00018838647342995168, "loss": 0.6742, "step": 5880 }, { "epoch": 1.88, "learning_rate": 0.00018819323671497582, "loss": 0.6462, "step": 5890 }, { "epoch": 1.89, "learning_rate": 0.000188, "loss": 0.6907, "step": 5900 }, { "epoch": 1.89, "learning_rate": 0.00018780676328502413, "loss": 0.6645, "step": 5910 }, { "epoch": 1.89, "learning_rate": 0.0001876135265700483, "loss": 0.6719, "step": 5920 }, { "epoch": 1.9, "learning_rate": 0.00018742028985507244, "loss": 0.6417, "step": 5930 }, { "epoch": 1.9, "learning_rate": 0.0001872270531400966, "loss": 0.6383, "step": 5940 }, { "epoch": 1.9, "learning_rate": 0.00018703381642512076, "loss": 0.6342, "step": 5950 }, { "epoch": 1.91, "learning_rate": 0.0001868405797101449, "loss": 0.657, "step": 5960 }, { "epoch": 1.91, "learning_rate": 0.00018664734299516907, "loss": 0.6832, "step": 5970 }, { "epoch": 1.91, "learning_rate": 0.0001864541062801932, "loss": 0.6658, "step": 5980 }, { "epoch": 1.92, "learning_rate": 0.00018626086956521738, "loss": 0.6614, "step": 5990 }, { "epoch": 1.92, "learning_rate": 0.00018606763285024152, "loss": 0.6878, "step": 6000 }, { "epoch": 1.92, "eval_loss": 0.6828343272209167, "eval_runtime": 392.2472, "eval_samples_per_second": 5.099, "eval_steps_per_second": 0.637, "step": 6000 }, { "epoch": 1.92, "learning_rate": 0.0001858743961352657, "loss": 0.6408, "step": 6010 }, { "epoch": 1.93, "learning_rate": 0.00018568115942028984, "loss": 0.6498, "step": 6020 }, { "epoch": 1.93, "learning_rate": 0.000185487922705314, "loss": 0.6598, "step": 6030 }, { "epoch": 1.93, "learning_rate": 0.00018529468599033815, "loss": 0.6231, "step": 6040 }, { "epoch": 1.94, "learning_rate": 0.0001851014492753623, "loss": 0.6906, "step": 6050 }, { "epoch": 1.94, "learning_rate": 0.00018490821256038646, "loss": 0.6467, "step": 6060 }, { "epoch": 1.94, "learning_rate": 0.0001847149758454106, "loss": 0.6355, "step": 6070 }, { "epoch": 1.95, "learning_rate": 0.00018452173913043477, "loss": 0.6794, "step": 6080 }, { "epoch": 1.95, "learning_rate": 0.00018432850241545891, "loss": 0.6475, "step": 6090 }, { "epoch": 1.95, "learning_rate": 0.00018413526570048306, "loss": 0.6165, "step": 6100 }, { "epoch": 1.96, "learning_rate": 0.00018394202898550723, "loss": 0.6281, "step": 6110 }, { "epoch": 1.96, "learning_rate": 0.0001837487922705314, "loss": 0.6473, "step": 6120 }, { "epoch": 1.96, "learning_rate": 0.00018355555555555554, "loss": 0.6415, "step": 6130 }, { "epoch": 1.96, "learning_rate": 0.00018336231884057968, "loss": 0.659, "step": 6140 }, { "epoch": 1.97, "learning_rate": 0.00018316908212560385, "loss": 0.6821, "step": 6150 }, { "epoch": 1.97, "learning_rate": 0.00018297584541062802, "loss": 0.6631, "step": 6160 }, { "epoch": 1.97, "learning_rate": 0.00018278260869565216, "loss": 0.6332, "step": 6170 }, { "epoch": 1.98, "learning_rate": 0.0001825893719806763, "loss": 0.6561, "step": 6180 }, { "epoch": 1.98, "learning_rate": 0.00018239613526570045, "loss": 0.6654, "step": 6190 }, { "epoch": 1.98, "learning_rate": 0.00018220289855072462, "loss": 0.6656, "step": 6200 }, { "epoch": 1.98, "eval_loss": 0.6827040314674377, "eval_runtime": 411.107, "eval_samples_per_second": 4.865, "eval_steps_per_second": 0.608, "step": 6200 } ], "logging_steps": 10, "max_steps": 15625, "num_train_epochs": 5, "save_steps": 200, "total_flos": 9.076709518992998e+17, "trial_name": null, "trial_params": null }