diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4864 @@ +{ + "best_metric": 0.9796821008984106, + "best_model_checkpoint": "test/checkpoint-1500", + "epoch": 13.377926421404682, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03, + "eval_accuracy": 0.8922687113634554, + "eval_f1": 0.0, + "eval_loss": 1.3138163089752197, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 42.6447, + "eval_samples_per_second": 10.06, + "eval_steps_per_second": 1.688, + "step": 10 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.8922687113634554, + "eval_f1": 0.0, + "eval_loss": 0.7134402394294739, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 44.6528, + "eval_samples_per_second": 9.607, + "eval_steps_per_second": 1.612, + "step": 20 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.8922687113634554, + "eval_f1": 0.0, + "eval_loss": 0.6043053865432739, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 44.2765, + "eval_samples_per_second": 9.689, + "eval_steps_per_second": 1.626, + "step": 30 + }, + { + "epoch": 0.13, + "eval_accuracy": 0.8922687113634554, + "eval_f1": 0.0, + "eval_loss": 0.5121276378631592, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 45.3257, + "eval_samples_per_second": 9.465, + "eval_steps_per_second": 1.589, + "step": 40 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.8922687113634554, + "eval_f1": 0.0, + "eval_loss": 0.4018501937389374, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 44.7272, + "eval_samples_per_second": 9.591, + "eval_steps_per_second": 1.61, + "step": 50 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.908479477859436, + "eval_f1": 0.0, + "eval_loss": 0.3501987159252167, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 44.7765, + "eval_samples_per_second": 9.581, + "eval_steps_per_second": 1.608, + "step": 60 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.9237218434108938, + "eval_f1": 0.0, + "eval_loss": 0.29354768991470337, + "eval_precision": 0.0, + "eval_recall": 0.0, + "eval_runtime": 44.776, + "eval_samples_per_second": 9.581, + "eval_steps_per_second": 1.608, + "step": 70 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.9264413255153751, + "eval_f1": 0.0008230452674897119, + "eval_loss": 0.24890266358852386, + "eval_precision": 0.001594896331738437, + "eval_recall": 0.0005546311702717693, + "eval_runtime": 44.807, + "eval_samples_per_second": 9.574, + "eval_steps_per_second": 1.607, + "step": 80 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.9392427900559815, + "eval_f1": 0.013897161008536827, + "eval_loss": 0.2203027307987213, + "eval_precision": 0.02445842068483578, + "eval_recall": 0.009706045479755962, + "eval_runtime": 44.7926, + "eval_samples_per_second": 9.577, + "eval_steps_per_second": 1.607, + "step": 90 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.935727361969701, + "eval_f1": 0.00045840018336007336, + "eval_loss": 0.19940191507339478, + "eval_precision": 0.001321003963011889, + "eval_recall": 0.00027731558513588466, + "eval_runtime": 44.7253, + "eval_samples_per_second": 9.592, + "eval_steps_per_second": 1.61, + "step": 100 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.943779682152238, + "eval_f1": 0.07579030041233065, + "eval_loss": 0.1813557893037796, + "eval_precision": 0.12979152656355078, + "eval_recall": 0.053521907931225736, + "eval_runtime": 44.6764, + "eval_samples_per_second": 9.602, + "eval_steps_per_second": 1.612, + "step": 110 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.9479053354912313, + "eval_f1": 0.11372837688627163, + "eval_loss": 0.16338485479354858, + "eval_precision": 0.16903719912472648, + "eval_recall": 0.08569051580698835, + "eval_runtime": 44.7776, + "eval_samples_per_second": 9.581, + "eval_steps_per_second": 1.608, + "step": 120 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.9628027911172429, + "eval_f1": 0.33931930904737473, + "eval_loss": 0.14094215631484985, + "eval_precision": 0.4426595269968764, + "eval_recall": 0.27509706045479754, + "eval_runtime": 44.9683, + "eval_samples_per_second": 9.54, + "eval_steps_per_second": 1.601, + "step": 130 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.9663580165026133, + "eval_f1": 0.34743875278396436, + "eval_loss": 0.12451943010091782, + "eval_precision": 0.40746268656716417, + "eval_recall": 0.30282861896838603, + "eval_runtime": 44.682, + "eval_samples_per_second": 9.601, + "eval_steps_per_second": 1.611, + "step": 140 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.9711734896924995, + "eval_f1": 0.47246835443037977, + "eval_loss": 0.11197753250598907, + "eval_precision": 0.5501105379513633, + "eval_recall": 0.4140321686078758, + "eval_runtime": 44.7253, + "eval_samples_per_second": 9.592, + "eval_steps_per_second": 1.61, + "step": 150 + }, + { + "epoch": 0.54, + "eval_accuracy": 0.9736674537688043, + "eval_f1": 0.5392111368909513, + "eval_loss": 0.09976229071617126, + "eval_precision": 0.6096537250786989, + "eval_recall": 0.48336106489184694, + "eval_runtime": 44.7165, + "eval_samples_per_second": 9.594, + "eval_steps_per_second": 1.61, + "step": 160 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.9746623862460534, + "eval_f1": 0.5685945368533496, + "eval_loss": 0.09186050295829773, + "eval_precision": 0.6321683067526298, + "eval_recall": 0.5166389351081531, + "eval_runtime": 44.7738, + "eval_samples_per_second": 9.582, + "eval_steps_per_second": 1.608, + "step": 170 + }, + { + "epoch": 0.6, + "eval_accuracy": 0.9771696160887214, + "eval_f1": 0.5888754534461911, + "eval_loss": 0.0845290869474411, + "eval_precision": 0.6471760797342193, + "eval_recall": 0.5402107598447032, + "eval_runtime": 44.7399, + "eval_samples_per_second": 9.589, + "eval_steps_per_second": 1.609, + "step": 180 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.980194210819559, + "eval_f1": 0.6471658078205517, + "eval_loss": 0.07714465260505676, + "eval_precision": 0.7135695187165776, + "eval_recall": 0.5920687742651137, + "eval_runtime": 44.734, + "eval_samples_per_second": 9.59, + "eval_steps_per_second": 1.61, + "step": 190 + }, + { + "epoch": 0.67, + "eval_accuracy": 0.9795839855668462, + "eval_f1": 0.7072773084333779, + "eval_loss": 0.07666940242052078, + "eval_precision": 0.7596306908627826, + "eval_recall": 0.6616749861342207, + "eval_runtime": 44.7886, + "eval_samples_per_second": 9.578, + "eval_steps_per_second": 1.608, + "step": 200 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.9832586028494866, + "eval_f1": 0.7367480643240022, + "eval_loss": 0.06706634908914566, + "eval_precision": 0.7954983922829582, + "eval_recall": 0.6860787576261785, + "eval_runtime": 44.9641, + "eval_samples_per_second": 9.541, + "eval_steps_per_second": 1.601, + "step": 210 + }, + { + "epoch": 0.74, + "eval_accuracy": 0.9847576344485421, + "eval_f1": 0.7383826191913097, + "eval_loss": 0.06665363162755966, + "eval_precision": 0.8097286565188617, + "eval_recall": 0.6785912368275097, + "eval_runtime": 44.8134, + "eval_samples_per_second": 9.573, + "eval_steps_per_second": 1.607, + "step": 220 + }, + { + "epoch": 0.77, + "eval_accuracy": 0.9845321164203656, + "eval_f1": 0.752096513167574, + "eval_loss": 0.06071800738573074, + "eval_precision": 0.8010028204324663, + "eval_recall": 0.7088186356073212, + "eval_runtime": 44.7268, + "eval_samples_per_second": 9.592, + "eval_steps_per_second": 1.61, + "step": 230 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.986482184075774, + "eval_f1": 0.7756542193472508, + "eval_loss": 0.057662855833768845, + "eval_precision": 0.8254067584480601, + "eval_recall": 0.7315585135884637, + "eval_runtime": 44.6791, + "eval_samples_per_second": 9.602, + "eval_steps_per_second": 1.611, + "step": 240 + }, + { + "epoch": 0.84, + "eval_accuracy": 0.9871454723939402, + "eval_f1": 0.7851227662356531, + "eval_loss": 0.054133981466293335, + "eval_precision": 0.8245346353371986, + "eval_recall": 0.7493067110371603, + "eval_runtime": 44.7641, + "eval_samples_per_second": 9.584, + "eval_steps_per_second": 1.608, + "step": 250 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.9884985805629991, + "eval_f1": 0.8150755242704208, + "eval_loss": 0.05026474595069885, + "eval_precision": 0.8649237472766884, + "eval_recall": 0.7706600110926234, + "eval_runtime": 44.9736, + "eval_samples_per_second": 9.539, + "eval_steps_per_second": 1.601, + "step": 260 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.9881934679366426, + "eval_f1": 0.801503541997976, + "eval_loss": 0.04910367354750633, + "eval_precision": 0.8372093023255814, + "eval_recall": 0.7687188019966722, + "eval_runtime": 44.8146, + "eval_samples_per_second": 9.573, + "eval_steps_per_second": 1.607, + "step": 270 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.987875089543923, + "eval_f1": 0.8106457242582897, + "eval_loss": 0.04733191058039665, + "eval_precision": 0.8522935779816514, + "eval_recall": 0.7728785357737105, + "eval_runtime": 44.8863, + "eval_samples_per_second": 9.557, + "eval_steps_per_second": 1.604, + "step": 280 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.9887904274229923, + "eval_f1": 0.8112208387004436, + "eval_loss": 0.046026937663555145, + "eval_precision": 0.8382135462880804, + "eval_recall": 0.785912368275097, + "eval_runtime": 44.8674, + "eval_samples_per_second": 9.562, + "eval_steps_per_second": 1.605, + "step": 290 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9894404499747951, + "eval_f1": 0.8500071766901106, + "eval_loss": 0.0429680198431015, + "eval_precision": 0.880987801249628, + "eval_recall": 0.8211314475873545, + "eval_runtime": 45.1867, + "eval_samples_per_second": 9.494, + "eval_steps_per_second": 1.593, + "step": 300 + }, + { + "epoch": 1.04, + "eval_accuracy": 0.9901170040593245, + "eval_f1": 0.8402282453637661, + "eval_loss": 0.04167770594358444, + "eval_precision": 0.8651586368977673, + "eval_recall": 0.8166943982251803, + "eval_runtime": 44.8739, + "eval_samples_per_second": 9.56, + "eval_steps_per_second": 1.604, + "step": 310 + }, + { + "epoch": 1.07, + "eval_accuracy": 0.9908333554429439, + "eval_f1": 0.8512689635616051, + "eval_loss": 0.039857033640146255, + "eval_precision": 0.8709022338265158, + "eval_recall": 0.8325013865779257, + "eval_runtime": 44.8161, + "eval_samples_per_second": 9.572, + "eval_steps_per_second": 1.607, + "step": 320 + }, + { + "epoch": 1.1, + "eval_accuracy": 0.9919211482847364, + "eval_f1": 0.8664764621968616, + "eval_loss": 0.03763346001505852, + "eval_precision": 0.8921856639247944, + "eval_recall": 0.8422074320576817, + "eval_runtime": 45.0124, + "eval_samples_per_second": 9.531, + "eval_steps_per_second": 1.6, + "step": 330 + }, + { + "epoch": 1.14, + "eval_accuracy": 0.992106869013823, + "eval_f1": 0.8503758332151468, + "eval_loss": 0.035481277853250504, + "eval_precision": 0.8702467343976777, + "eval_recall": 0.8313921242373822, + "eval_runtime": 44.8357, + "eval_samples_per_second": 9.568, + "eval_steps_per_second": 1.606, + "step": 340 + }, + { + "epoch": 1.17, + "eval_accuracy": 0.9925048420047227, + "eval_f1": 0.8676303854875284, + "eval_loss": 0.03401191905140877, + "eval_precision": 0.8872463768115942, + "eval_recall": 0.8488630061009429, + "eval_runtime": 44.9137, + "eval_samples_per_second": 9.552, + "eval_steps_per_second": 1.603, + "step": 350 + }, + { + "epoch": 1.2, + "eval_accuracy": 0.9926905627338092, + "eval_f1": 0.8718825708992448, + "eval_loss": 0.032262638211250305, + "eval_precision": 0.8968044561712107, + "eval_recall": 0.8483083749306711, + "eval_runtime": 44.876, + "eval_samples_per_second": 9.56, + "eval_steps_per_second": 1.604, + "step": 360 + }, + { + "epoch": 1.24, + "eval_accuracy": 0.9930487384256188, + "eval_f1": 0.8813077469793889, + "eval_loss": 0.03254028782248497, + "eval_precision": 0.9040536599591718, + "eval_recall": 0.8596783139212424, + "eval_runtime": 44.7683, + "eval_samples_per_second": 9.583, + "eval_steps_per_second": 1.608, + "step": 370 + }, + { + "epoch": 1.27, + "eval_accuracy": 0.9927038285001725, + "eval_f1": 0.8627060830017056, + "eval_loss": 0.030650299042463303, + "eval_precision": 0.8848396501457726, + "eval_recall": 0.8416528008874099, + "eval_runtime": 44.7059, + "eval_samples_per_second": 9.596, + "eval_steps_per_second": 1.611, + "step": 380 + }, + { + "epoch": 1.3, + "eval_accuracy": 0.9934467114165185, + "eval_f1": 0.8900583131844688, + "eval_loss": 0.029178058728575706, + "eval_precision": 0.9135766423357664, + "eval_recall": 0.867720465890183, + "eval_runtime": 44.67, + "eval_samples_per_second": 9.604, + "eval_steps_per_second": 1.612, + "step": 390 + }, + { + "epoch": 1.34, + "eval_accuracy": 0.993300787986522, + "eval_f1": 0.8887311950042579, + "eval_loss": 0.02753848023712635, + "eval_precision": 0.9101744186046512, + "eval_recall": 0.8682750970604548, + "eval_runtime": 44.9312, + "eval_samples_per_second": 9.548, + "eval_steps_per_second": 1.602, + "step": 400 + }, + { + "epoch": 1.37, + "eval_accuracy": 0.9936456979119683, + "eval_f1": 0.894469525959368, + "eval_loss": 0.027282511815428734, + "eval_precision": 0.9103963239517519, + "eval_recall": 0.8790904048807543, + "eval_runtime": 44.6914, + "eval_samples_per_second": 9.599, + "eval_steps_per_second": 1.611, + "step": 410 + }, + { + "epoch": 1.4, + "eval_accuracy": 0.9922925897429095, + "eval_f1": 0.8725379853685987, + "eval_loss": 0.02893451787531376, + "eval_precision": 0.8854940034266133, + "eval_recall": 0.8599556295063783, + "eval_runtime": 44.7493, + "eval_samples_per_second": 9.587, + "eval_steps_per_second": 1.609, + "step": 420 + }, + { + "epoch": 1.44, + "eval_accuracy": 0.9938712159401448, + "eval_f1": 0.8959954719117023, + "eval_loss": 0.024990031495690346, + "eval_precision": 0.9147645189251662, + "eval_recall": 0.8779811425402108, + "eval_runtime": 44.6669, + "eval_samples_per_second": 9.604, + "eval_steps_per_second": 1.612, + "step": 430 + }, + { + "epoch": 1.47, + "eval_accuracy": 0.9938181528746916, + "eval_f1": 0.8929327126534067, + "eval_loss": 0.026491543278098106, + "eval_precision": 0.9086993970714901, + "eval_recall": 0.8777038269550749, + "eval_runtime": 44.7596, + "eval_samples_per_second": 9.585, + "eval_steps_per_second": 1.609, + "step": 440 + }, + { + "epoch": 1.51, + "eval_accuracy": 0.9945610357910376, + "eval_f1": 0.9141895681435781, + "eval_loss": 0.0247773639857769, + "eval_precision": 0.9245604083947816, + "eval_recall": 0.9040488075429839, + "eval_runtime": 44.7459, + "eval_samples_per_second": 9.587, + "eval_steps_per_second": 1.609, + "step": 450 + }, + { + "epoch": 1.54, + "eval_accuracy": 0.9943753150619511, + "eval_f1": 0.9076814658210007, + "eval_loss": 0.0243705864995718, + "eval_precision": 0.9229005445686443, + "eval_recall": 0.8929561841375485, + "eval_runtime": 44.7374, + "eval_samples_per_second": 9.589, + "eval_steps_per_second": 1.609, + "step": 460 + }, + { + "epoch": 1.57, + "eval_accuracy": 0.994534504258311, + "eval_f1": 0.9149444678757205, + "eval_loss": 0.023830989375710487, + "eval_precision": 0.9278585685771314, + "eval_recall": 0.9023849140321686, + "eval_runtime": 44.7741, + "eval_samples_per_second": 9.581, + "eval_steps_per_second": 1.608, + "step": 470 + }, + { + "epoch": 1.61, + "eval_accuracy": 0.9947069592210342, + "eval_f1": 0.9177099451399635, + "eval_loss": 0.022406980395317078, + "eval_precision": 0.931201827005424, + "eval_recall": 0.9046034387132557, + "eval_runtime": 44.7025, + "eval_samples_per_second": 9.597, + "eval_steps_per_second": 1.611, + "step": 480 + }, + { + "epoch": 1.64, + "eval_accuracy": 0.9947069592210342, + "eval_f1": 0.9264129826524903, + "eval_loss": 0.023025579750537872, + "eval_precision": 0.9347826086956522, + "eval_recall": 0.918191902384914, + "eval_runtime": 45.0649, + "eval_samples_per_second": 9.52, + "eval_steps_per_second": 1.598, + "step": 490 + }, + { + "epoch": 1.67, + "learning_rate": 9.5e-06, + "loss": 0.1784, + "step": 500 + }, + { + "epoch": 1.67, + "eval_accuracy": 0.9953702475392003, + "eval_f1": 0.9400780379041248, + "eval_loss": 0.022061465308070183, + "eval_precision": 0.9448179271708683, + "eval_recall": 0.9353854686633388, + "eval_runtime": 44.7675, + "eval_samples_per_second": 9.583, + "eval_steps_per_second": 1.608, + "step": 500 + }, + { + "epoch": 1.71, + "eval_accuracy": 0.9954100448382903, + "eval_f1": 0.9358616843279419, + "eval_loss": 0.02172040194272995, + "eval_precision": 0.9411104879416713, + "eval_recall": 0.9306711037160288, + "eval_runtime": 44.9669, + "eval_samples_per_second": 9.54, + "eval_steps_per_second": 1.601, + "step": 510 + }, + { + "epoch": 1.74, + "eval_accuracy": 0.9952243241092038, + "eval_f1": 0.9297780259667735, + "eval_loss": 0.021418150514364243, + "eval_precision": 0.9361821759910036, + "eval_recall": 0.9234608985024958, + "eval_runtime": 44.7829, + "eval_samples_per_second": 9.58, + "eval_steps_per_second": 1.608, + "step": 520 + }, + { + "epoch": 1.77, + "eval_accuracy": 0.9950651349128439, + "eval_f1": 0.9294002516426674, + "eval_loss": 0.02137417532503605, + "eval_precision": 0.937129968987877, + "eval_recall": 0.9217970049916805, + "eval_runtime": 44.837, + "eval_samples_per_second": 9.568, + "eval_steps_per_second": 1.606, + "step": 530 + }, + { + "epoch": 1.81, + "eval_accuracy": 0.9954100448382903, + "eval_f1": 0.9379098646574578, + "eval_loss": 0.02044159732758999, + "eval_precision": 0.94383600112328, + "eval_recall": 0.9320576816417082, + "eval_runtime": 44.8548, + "eval_samples_per_second": 9.564, + "eval_steps_per_second": 1.605, + "step": 540 + }, + { + "epoch": 1.84, + "eval_accuracy": 0.9959274097264599, + "eval_f1": 0.9561111111111111, + "eval_loss": 0.020166810601949692, + "eval_precision": 0.9577072899276572, + "eval_recall": 0.954520244037715, + "eval_runtime": 44.8166, + "eval_samples_per_second": 9.572, + "eval_steps_per_second": 1.607, + "step": 550 + }, + { + "epoch": 1.87, + "eval_accuracy": 0.9952375898755671, + "eval_f1": 0.9289403881055422, + "eval_loss": 0.020729683339595795, + "eval_precision": 0.935338768625246, + "eval_recall": 0.9226289517470881, + "eval_runtime": 44.8369, + "eval_samples_per_second": 9.568, + "eval_steps_per_second": 1.606, + "step": 560 + }, + { + "epoch": 1.91, + "eval_accuracy": 0.9960600673900931, + "eval_f1": 0.9606211869107044, + "eval_loss": 0.019887683913111687, + "eval_precision": 0.9606211869107044, + "eval_recall": 0.9606211869107044, + "eval_runtime": 44.8366, + "eval_samples_per_second": 9.568, + "eval_steps_per_second": 1.606, + "step": 570 + }, + { + "epoch": 1.94, + "eval_accuracy": 0.9956753601655568, + "eval_f1": 0.9422407794015311, + "eval_loss": 0.021609965711832047, + "eval_precision": 0.945794914780665, + "eval_recall": 0.9387132556849695, + "eval_runtime": 44.9147, + "eval_samples_per_second": 9.551, + "eval_steps_per_second": 1.603, + "step": 580 + }, + { + "epoch": 1.97, + "eval_accuracy": 0.9960468016237298, + "eval_f1": 0.9564975677553856, + "eval_loss": 0.019716205075383186, + "eval_precision": 0.9587628865979382, + "eval_recall": 0.9542429284525791, + "eval_runtime": 45.3593, + "eval_samples_per_second": 9.458, + "eval_steps_per_second": 1.587, + "step": 590 + }, + { + "epoch": 2.01, + "eval_accuracy": 0.9957549547637367, + "eval_f1": 0.9439577249339453, + "eval_loss": 0.020143885165452957, + "eval_precision": 0.9467224546722455, + "eval_recall": 0.9412090959511925, + "eval_runtime": 45.0377, + "eval_samples_per_second": 9.525, + "eval_steps_per_second": 1.599, + "step": 600 + }, + { + "epoch": 2.04, + "eval_accuracy": 0.9955029052028336, + "eval_f1": 0.9359207036158034, + "eval_loss": 0.019947798922657967, + "eval_precision": 0.9423671633398931, + "eval_recall": 0.9295618413754853, + "eval_runtime": 45.1113, + "eval_samples_per_second": 9.51, + "eval_steps_per_second": 1.596, + "step": 610 + }, + { + "epoch": 2.07, + "eval_accuracy": 0.9961131304555464, + "eval_f1": 0.9537564227190668, + "eval_loss": 0.01930239051580429, + "eval_precision": 0.9552155771905424, + "eval_recall": 0.9523017193566279, + "eval_runtime": 45.0399, + "eval_samples_per_second": 9.525, + "eval_steps_per_second": 1.599, + "step": 620 + }, + { + "epoch": 2.11, + "eval_accuracy": 0.9959141439600966, + "eval_f1": 0.9510664993726474, + "eval_loss": 0.020125582814216614, + "eval_precision": 0.9562657695542472, + "eval_recall": 0.9459234608985025, + "eval_runtime": 44.9443, + "eval_samples_per_second": 9.545, + "eval_steps_per_second": 1.602, + "step": 630 + }, + { + "epoch": 2.14, + "eval_accuracy": 0.9965376349791727, + "eval_f1": 0.9709141274238227, + "eval_loss": 0.01933199167251587, + "eval_precision": 0.9698395130049806, + "eval_recall": 0.9719911259012757, + "eval_runtime": 44.9595, + "eval_samples_per_second": 9.542, + "eval_steps_per_second": 1.601, + "step": 640 + }, + { + "epoch": 2.17, + "eval_accuracy": 0.9962988511846329, + "eval_f1": 0.9642262895174709, + "eval_loss": 0.0191953107714653, + "eval_precision": 0.9642262895174709, + "eval_recall": 0.9642262895174709, + "eval_runtime": 44.8109, + "eval_samples_per_second": 9.574, + "eval_steps_per_second": 1.607, + "step": 650 + }, + { + "epoch": 2.21, + "eval_accuracy": 0.9957814862964633, + "eval_f1": 0.947662247034194, + "eval_loss": 0.019274834543466568, + "eval_precision": 0.9539196403484125, + "eval_recall": 0.9414864115363284, + "eval_runtime": 45.2879, + "eval_samples_per_second": 9.473, + "eval_steps_per_second": 1.59, + "step": 660 + }, + { + "epoch": 2.24, + "eval_accuracy": 0.9965906980446261, + "eval_f1": 0.9721568084222192, + "eval_loss": 0.018348801881074905, + "eval_precision": 0.9712150567395517, + "eval_recall": 0.9731003882418192, + "eval_runtime": 44.979, + "eval_samples_per_second": 9.538, + "eval_steps_per_second": 1.601, + "step": 670 + }, + { + "epoch": 2.27, + "eval_accuracy": 0.9968029503064392, + "eval_f1": 0.975387168141593, + "eval_loss": 0.018433524295687675, + "eval_precision": 0.9726971869829013, + "eval_recall": 0.9780920687742651, + "eval_runtime": 44.9845, + "eval_samples_per_second": 9.537, + "eval_steps_per_second": 1.601, + "step": 680 + }, + { + "epoch": 2.31, + "eval_accuracy": 0.9967631530073492, + "eval_f1": 0.9695880564003317, + "eval_loss": 0.018039193004369736, + "eval_precision": 0.9666482910694597, + "eval_recall": 0.9725457570715474, + "eval_runtime": 44.9711, + "eval_samples_per_second": 9.539, + "eval_steps_per_second": 1.601, + "step": 690 + }, + { + "epoch": 2.34, + "eval_accuracy": 0.9968029503064392, + "eval_f1": 0.9738625363020329, + "eval_loss": 0.01683180034160614, + "eval_precision": 0.9713103448275862, + "eval_recall": 0.9764281752634498, + "eval_runtime": 44.9102, + "eval_samples_per_second": 9.552, + "eval_steps_per_second": 1.603, + "step": 700 + }, + { + "epoch": 2.37, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.9755152856550007, + "eval_loss": 0.01656004600226879, + "eval_precision": 0.9732266077836047, + "eval_recall": 0.9778147531891292, + "eval_runtime": 44.9636, + "eval_samples_per_second": 9.541, + "eval_steps_per_second": 1.601, + "step": 710 + }, + { + "epoch": 2.41, + "eval_accuracy": 0.9967498872409859, + "eval_f1": 0.973655019412091, + "eval_loss": 0.016868896782398224, + "eval_precision": 0.973655019412091, + "eval_recall": 0.973655019412091, + "eval_runtime": 44.8793, + "eval_samples_per_second": 9.559, + "eval_steps_per_second": 1.604, + "step": 720 + }, + { + "epoch": 2.44, + "eval_accuracy": 0.9968427476055292, + "eval_f1": 0.975765129483451, + "eval_loss": 0.01695641689002514, + "eval_precision": 0.9745504840940525, + "eval_recall": 0.9769828064337216, + "eval_runtime": 44.9769, + "eval_samples_per_second": 9.538, + "eval_steps_per_second": 1.601, + "step": 730 + }, + { + "epoch": 2.47, + "eval_accuracy": 0.9966968241755326, + "eval_f1": 0.9696128763701957, + "eval_loss": 0.017096424475312233, + "eval_precision": 0.9702860316578729, + "eval_recall": 0.968940654464781, + "eval_runtime": 45.2584, + "eval_samples_per_second": 9.479, + "eval_steps_per_second": 1.591, + "step": 740 + }, + { + "epoch": 2.51, + "eval_accuracy": 0.9968294818391659, + "eval_f1": 0.9747992245915259, + "eval_loss": 0.016900014132261276, + "eval_precision": 0.9734513274336283, + "eval_recall": 0.9761508596783139, + "eval_runtime": 44.9939, + "eval_samples_per_second": 9.535, + "eval_steps_per_second": 1.6, + "step": 750 + }, + { + "epoch": 2.54, + "eval_accuracy": 0.9969090764373458, + "eval_f1": 0.9747992245915259, + "eval_loss": 0.01711142621934414, + "eval_precision": 0.9734513274336283, + "eval_recall": 0.9761508596783139, + "eval_runtime": 44.9916, + "eval_samples_per_second": 9.535, + "eval_steps_per_second": 1.6, + "step": 760 + }, + { + "epoch": 2.58, + "eval_accuracy": 0.9968029503064392, + "eval_f1": 0.9726957726957727, + "eval_loss": 0.017322950065135956, + "eval_precision": 0.972291493488501, + "eval_recall": 0.9731003882418192, + "eval_runtime": 44.9667, + "eval_samples_per_second": 9.54, + "eval_steps_per_second": 1.601, + "step": 770 + }, + { + "epoch": 2.61, + "eval_accuracy": 0.9970019368018891, + "eval_f1": 0.9776853776853778, + "eval_loss": 0.015976430848240852, + "eval_precision": 0.9772790246605708, + "eval_recall": 0.9780920687742651, + "eval_runtime": 44.9823, + "eval_samples_per_second": 9.537, + "eval_steps_per_second": 1.601, + "step": 780 + }, + { + "epoch": 2.64, + "eval_accuracy": 0.9968427476055292, + "eval_f1": 0.9725685785536159, + "eval_loss": 0.016221042722463608, + "eval_precision": 0.9717607973421927, + "eval_recall": 0.9733777038269551, + "eval_runtime": 45.0356, + "eval_samples_per_second": 9.526, + "eval_steps_per_second": 1.599, + "step": 790 + }, + { + "epoch": 2.68, + "eval_accuracy": 0.9967764187737126, + "eval_f1": 0.9714760454167821, + "eval_loss": 0.016350209712982178, + "eval_precision": 0.9701327433628318, + "eval_recall": 0.9728230726566833, + "eval_runtime": 44.9594, + "eval_samples_per_second": 9.542, + "eval_steps_per_second": 1.601, + "step": 800 + }, + { + "epoch": 2.71, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.9753325942350333, + "eval_loss": 0.0164735559374094, + "eval_precision": 0.974792243767313, + "eval_recall": 0.9758735440931781, + "eval_runtime": 44.8809, + "eval_samples_per_second": 9.559, + "eval_steps_per_second": 1.604, + "step": 810 + }, + { + "epoch": 2.74, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9754541672444876, + "eval_loss": 0.01638418808579445, + "eval_precision": 0.9755894590846047, + "eval_recall": 0.9753189129229063, + "eval_runtime": 45.2809, + "eval_samples_per_second": 9.474, + "eval_steps_per_second": 1.59, + "step": 820 + }, + { + "epoch": 2.78, + "eval_accuracy": 0.9951712610437505, + "eval_f1": 0.9239864864864865, + "eval_loss": 0.0193793848156929, + "eval_precision": 0.9382504288164666, + "eval_recall": 0.9101497504159733, + "eval_runtime": 44.9984, + "eval_samples_per_second": 9.534, + "eval_steps_per_second": 1.6, + "step": 830 + }, + { + "epoch": 2.81, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.979071379071379, + "eval_loss": 0.015317755751311779, + "eval_precision": 0.9786644499861458, + "eval_recall": 0.9794786466999446, + "eval_runtime": 44.8121, + "eval_samples_per_second": 9.573, + "eval_steps_per_second": 1.607, + "step": 840 + }, + { + "epoch": 2.84, + "eval_accuracy": 0.9968825449046191, + "eval_f1": 0.9747712780704185, + "eval_loss": 0.016130488365888596, + "eval_precision": 0.9745011086474501, + "eval_recall": 0.9750415973377704, + "eval_runtime": 44.9246, + "eval_samples_per_second": 9.549, + "eval_steps_per_second": 1.603, + "step": 850 + }, + { + "epoch": 2.88, + "eval_accuracy": 0.9965111034464461, + "eval_f1": 0.9585304759254106, + "eval_loss": 0.017094574868679047, + "eval_precision": 0.9620111731843576, + "eval_recall": 0.9550748752079867, + "eval_runtime": 44.9689, + "eval_samples_per_second": 9.54, + "eval_steps_per_second": 1.601, + "step": 860 + }, + { + "epoch": 2.91, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9767634854771784, + "eval_loss": 0.016324549913406372, + "eval_precision": 0.9743377483443708, + "eval_recall": 0.9792013311148087, + "eval_runtime": 44.9212, + "eval_samples_per_second": 9.55, + "eval_steps_per_second": 1.603, + "step": 870 + }, + { + "epoch": 2.94, + "eval_accuracy": 0.9970284683346157, + "eval_f1": 0.9753394292047659, + "eval_loss": 0.016577888280153275, + "eval_precision": 0.9745293466223699, + "eval_recall": 0.9761508596783139, + "eval_runtime": 45.0626, + "eval_samples_per_second": 9.52, + "eval_steps_per_second": 1.598, + "step": 880 + }, + { + "epoch": 2.98, + "eval_accuracy": 0.9968560133718924, + "eval_f1": 0.9743589743589743, + "eval_loss": 0.01666262373328209, + "eval_precision": 0.973954003879191, + "eval_recall": 0.9747642817526345, + "eval_runtime": 44.929, + "eval_samples_per_second": 9.548, + "eval_steps_per_second": 1.603, + "step": 890 + }, + { + "epoch": 3.01, + "eval_accuracy": 0.9966570268764426, + "eval_f1": 0.9715396362626684, + "eval_loss": 0.01778605580329895, + "eval_precision": 0.9727550736725049, + "eval_recall": 0.9703272323904604, + "eval_runtime": 44.9166, + "eval_samples_per_second": 9.551, + "eval_steps_per_second": 1.603, + "step": 900 + }, + { + "epoch": 3.04, + "eval_accuracy": 0.9970815314000689, + "eval_f1": 0.9786407766990292, + "eval_loss": 0.016635755077004433, + "eval_precision": 0.978912319644839, + "eval_recall": 0.978369384359401, + "eval_runtime": 45.1716, + "eval_samples_per_second": 9.497, + "eval_steps_per_second": 1.594, + "step": 910 + }, + { + "epoch": 3.08, + "eval_accuracy": 0.9971478602318856, + "eval_f1": 0.9811529933481152, + "eval_loss": 0.01662967912852764, + "eval_precision": 0.9806094182825484, + "eval_recall": 0.9816971713810316, + "eval_runtime": 44.9112, + "eval_samples_per_second": 9.552, + "eval_steps_per_second": 1.603, + "step": 920 + }, + { + "epoch": 3.11, + "eval_accuracy": 0.9969488737364357, + "eval_f1": 0.9753052164261932, + "eval_loss": 0.016620052978396416, + "eval_precision": 0.975846751804553, + "eval_recall": 0.9747642817526345, + "eval_runtime": 44.9363, + "eval_samples_per_second": 9.547, + "eval_steps_per_second": 1.602, + "step": 930 + }, + { + "epoch": 3.14, + "eval_accuracy": 0.9972407205964289, + "eval_f1": 0.9815508392287419, + "eval_loss": 0.015619627200067043, + "eval_precision": 0.9819594782126007, + "eval_recall": 0.9811425402107599, + "eval_runtime": 44.9174, + "eval_samples_per_second": 9.551, + "eval_steps_per_second": 1.603, + "step": 940 + }, + { + "epoch": 3.18, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9779442363712026, + "eval_loss": 0.015341303311288357, + "eval_precision": 0.9783513738551207, + "eval_recall": 0.9775374376039934, + "eval_runtime": 45.0123, + "eval_samples_per_second": 9.531, + "eval_steps_per_second": 1.6, + "step": 950 + }, + { + "epoch": 3.21, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9764281752634498, + "eval_loss": 0.01650950312614441, + "eval_precision": 0.9764281752634498, + "eval_recall": 0.9764281752634498, + "eval_runtime": 44.93, + "eval_samples_per_second": 9.548, + "eval_steps_per_second": 1.602, + "step": 960 + }, + { + "epoch": 3.24, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9774144381321879, + "eval_loss": 0.01659621112048626, + "eval_precision": 0.9767377457767932, + "eval_recall": 0.9780920687742651, + "eval_runtime": 44.9897, + "eval_samples_per_second": 9.536, + "eval_steps_per_second": 1.6, + "step": 970 + }, + { + "epoch": 3.28, + "eval_accuracy": 0.9967498872409859, + "eval_f1": 0.9746572496884087, + "eval_loss": 0.016826625913381577, + "eval_precision": 0.9734439834024896, + "eval_recall": 0.9758735440931781, + "eval_runtime": 45.2137, + "eval_samples_per_second": 9.488, + "eval_steps_per_second": 1.592, + "step": 980 + }, + { + "epoch": 3.31, + "eval_accuracy": 0.9968958106709824, + "eval_f1": 0.978146611341632, + "eval_loss": 0.016263196244835854, + "eval_precision": 0.9757174392935982, + "eval_recall": 0.9805879090404881, + "eval_runtime": 44.9271, + "eval_samples_per_second": 9.549, + "eval_steps_per_second": 1.603, + "step": 990 + }, + { + "epoch": 3.34, + "learning_rate": 9e-06, + "loss": 0.0154, + "step": 1000 + }, + { + "epoch": 3.34, + "eval_accuracy": 0.9968294818391659, + "eval_f1": 0.9775809576529201, + "eval_loss": 0.016901057213544846, + "eval_precision": 0.9756906077348066, + "eval_recall": 0.9794786466999446, + "eval_runtime": 44.996, + "eval_samples_per_second": 9.534, + "eval_steps_per_second": 1.6, + "step": 1000 + }, + { + "epoch": 3.38, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9803269603768356, + "eval_loss": 0.01609906367957592, + "eval_precision": 0.9795127353266888, + "eval_recall": 0.9811425402107599, + "eval_runtime": 45.3202, + "eval_samples_per_second": 9.466, + "eval_steps_per_second": 1.589, + "step": 1010 + }, + { + "epoch": 3.41, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.977993079584775, + "eval_loss": 0.016519052907824516, + "eval_precision": 0.9762365294280188, + "eval_recall": 0.9797559622850804, + "eval_runtime": 45.0574, + "eval_samples_per_second": 9.521, + "eval_steps_per_second": 1.598, + "step": 1020 + }, + { + "epoch": 3.44, + "eval_accuracy": 0.9967233557082593, + "eval_f1": 0.9722530521642619, + "eval_loss": 0.01666153408586979, + "eval_precision": 0.9727928928373126, + "eval_recall": 0.9717138103161398, + "eval_runtime": 45.0201, + "eval_samples_per_second": 9.529, + "eval_steps_per_second": 1.599, + "step": 1030 + }, + { + "epoch": 3.48, + "eval_accuracy": 0.9969090764373458, + "eval_f1": 0.9761640798226164, + "eval_loss": 0.015780288726091385, + "eval_precision": 0.9756232686980609, + "eval_recall": 0.9767054908485857, + "eval_runtime": 45.2347, + "eval_samples_per_second": 9.484, + "eval_steps_per_second": 1.592, + "step": 1040 + }, + { + "epoch": 3.51, + "eval_accuracy": 0.9968958106709824, + "eval_f1": 0.9766284054764209, + "eval_loss": 0.015381171368062496, + "eval_precision": 0.9740689655172414, + "eval_recall": 0.9792013311148087, + "eval_runtime": 45.2769, + "eval_samples_per_second": 9.475, + "eval_steps_per_second": 1.59, + "step": 1050 + }, + { + "epoch": 3.55, + "eval_accuracy": 0.9959804727919132, + "eval_f1": 0.9448555074689375, + "eval_loss": 0.01652899943292141, + "eval_precision": 0.9513635085746416, + "eval_recall": 0.9384359400998337, + "eval_runtime": 45.1375, + "eval_samples_per_second": 9.504, + "eval_steps_per_second": 1.595, + "step": 1060 + }, + { + "epoch": 3.58, + "eval_accuracy": 0.9968825449046191, + "eval_f1": 0.9749619587771476, + "eval_loss": 0.015719007700681686, + "eval_precision": 0.9726745790781121, + "eval_recall": 0.9772601220188575, + "eval_runtime": 45.0215, + "eval_samples_per_second": 9.529, + "eval_steps_per_second": 1.599, + "step": 1070 + }, + { + "epoch": 3.61, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9782458085076902, + "eval_loss": 0.01600920408964157, + "eval_precision": 0.977568540570479, + "eval_recall": 0.9789240155296728, + "eval_runtime": 45.1658, + "eval_samples_per_second": 9.498, + "eval_steps_per_second": 1.594, + "step": 1080 + }, + { + "epoch": 3.65, + "eval_accuracy": 0.99580801782919, + "eval_f1": 0.9429732380552053, + "eval_loss": 0.01786983013153076, + "eval_precision": 0.9529878221467006, + "eval_recall": 0.9331669439822518, + "eval_runtime": 45.14, + "eval_samples_per_second": 9.504, + "eval_steps_per_second": 1.595, + "step": 1090 + }, + { + "epoch": 3.68, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.977439446366782, + "eval_loss": 0.015765171498060226, + "eval_precision": 0.9756838905775076, + "eval_recall": 0.9792013311148087, + "eval_runtime": 45.1016, + "eval_samples_per_second": 9.512, + "eval_steps_per_second": 1.596, + "step": 1100 + }, + { + "epoch": 3.71, + "eval_accuracy": 0.9968958106709824, + "eval_f1": 0.977703919124775, + "eval_loss": 0.015863990411162376, + "eval_precision": 0.9764868603042877, + "eval_recall": 0.9789240155296728, + "eval_runtime": 45.1212, + "eval_samples_per_second": 9.508, + "eval_steps_per_second": 1.596, + "step": 1110 + }, + { + "epoch": 3.75, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.975609756097561, + "eval_loss": 0.01584913209080696, + "eval_precision": 0.9750692520775623, + "eval_recall": 0.9761508596783139, + "eval_runtime": 45.0612, + "eval_samples_per_second": 9.52, + "eval_steps_per_second": 1.598, + "step": 1120 + }, + { + "epoch": 3.78, + "eval_accuracy": 0.9969488737364357, + "eval_f1": 0.9744869661674986, + "eval_loss": 0.01588474027812481, + "eval_precision": 0.9744869661674986, + "eval_recall": 0.9744869661674986, + "eval_runtime": 45.3578, + "eval_samples_per_second": 9.458, + "eval_steps_per_second": 1.587, + "step": 1130 + }, + { + "epoch": 3.81, + "eval_accuracy": 0.9968958106709824, + "eval_f1": 0.9752846431546792, + "eval_loss": 0.016261184588074684, + "eval_precision": 0.9766407119021134, + "eval_recall": 0.9739323349972269, + "eval_runtime": 45.1022, + "eval_samples_per_second": 9.512, + "eval_steps_per_second": 1.596, + "step": 1140 + }, + { + "epoch": 3.85, + "eval_accuracy": 0.9967100899418959, + "eval_f1": 0.9723145071982281, + "eval_loss": 0.016805831342935562, + "eval_precision": 0.970702045328911, + "eval_recall": 0.9739323349972269, + "eval_runtime": 45.1109, + "eval_samples_per_second": 9.51, + "eval_steps_per_second": 1.596, + "step": 1150 + }, + { + "epoch": 3.88, + "eval_accuracy": 0.9970152025682524, + "eval_f1": 0.9805771365149833, + "eval_loss": 0.01488415990024805, + "eval_precision": 0.9811215991116047, + "eval_recall": 0.9800332778702163, + "eval_runtime": 45.1408, + "eval_samples_per_second": 9.504, + "eval_steps_per_second": 1.595, + "step": 1160 + }, + { + "epoch": 3.91, + "eval_accuracy": 0.9970152025682524, + "eval_f1": 0.9792531120331951, + "eval_loss": 0.014714999124407768, + "eval_precision": 0.9768211920529801, + "eval_recall": 0.9816971713810316, + "eval_runtime": 45.0991, + "eval_samples_per_second": 9.512, + "eval_steps_per_second": 1.596, + "step": 1170 + }, + { + "epoch": 3.95, + "eval_accuracy": 0.9969754052691624, + "eval_f1": 0.9757986447241046, + "eval_loss": 0.01566295139491558, + "eval_precision": 0.9732413793103448, + "eval_recall": 0.978369384359401, + "eval_runtime": 45.1028, + "eval_samples_per_second": 9.512, + "eval_steps_per_second": 1.596, + "step": 1180 + }, + { + "epoch": 3.98, + "eval_accuracy": 0.9969886710355257, + "eval_f1": 0.9786880708552449, + "eval_loss": 0.015323741361498833, + "eval_precision": 0.9767955801104973, + "eval_recall": 0.9805879090404881, + "eval_runtime": 45.1524, + "eval_samples_per_second": 9.501, + "eval_steps_per_second": 1.595, + "step": 1190 + }, + { + "epoch": 4.01, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9747642817526345, + "eval_loss": 0.015690365806221962, + "eval_precision": 0.9747642817526345, + "eval_recall": 0.9747642817526345, + "eval_runtime": 45.1208, + "eval_samples_per_second": 9.508, + "eval_steps_per_second": 1.596, + "step": 1200 + }, + { + "epoch": 4.05, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9797783933518006, + "eval_loss": 0.015430403873324394, + "eval_precision": 0.978693967902601, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.032, + "eval_samples_per_second": 9.527, + "eval_steps_per_second": 1.599, + "step": 1210 + }, + { + "epoch": 4.08, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9780052566053397, + "eval_loss": 0.0156533382833004, + "eval_precision": 0.9757107369583218, + "eval_recall": 0.9803105934553522, + "eval_runtime": 45.2988, + "eval_samples_per_second": 9.47, + "eval_steps_per_second": 1.589, + "step": 1220 + }, + { + "epoch": 4.11, + "eval_accuracy": 0.9968958106709824, + "eval_f1": 0.9735493698933665, + "eval_loss": 0.016326196491718292, + "eval_precision": 0.9723374827109267, + "eval_recall": 0.9747642817526345, + "eval_runtime": 45.0756, + "eval_samples_per_second": 9.517, + "eval_steps_per_second": 1.597, + "step": 1230 + }, + { + "epoch": 4.15, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9778699861687413, + "eval_loss": 0.015600275248289108, + "eval_precision": 0.9754415011037527, + "eval_recall": 0.9803105934553522, + "eval_runtime": 44.9579, + "eval_samples_per_second": 9.542, + "eval_steps_per_second": 1.601, + "step": 1240 + }, + { + "epoch": 4.18, + "eval_accuracy": 0.9970152025682524, + "eval_f1": 0.9774331995015921, + "eval_loss": 0.015862880274653435, + "eval_precision": 0.9759469173348079, + "eval_recall": 0.9789240155296728, + "eval_runtime": 45.0784, + "eval_samples_per_second": 9.517, + "eval_steps_per_second": 1.597, + "step": 1250 + }, + { + "epoch": 4.21, + "eval_accuracy": 0.9970284683346157, + "eval_f1": 0.9802076124567475, + "eval_loss": 0.01617247611284256, + "eval_precision": 0.9784470848300636, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.0516, + "eval_samples_per_second": 9.522, + "eval_steps_per_second": 1.598, + "step": 1260 + }, + { + "epoch": 4.25, + "eval_accuracy": 0.9970815314000689, + "eval_f1": 0.978146611341632, + "eval_loss": 0.016013789921998978, + "eval_precision": 0.9757174392935982, + "eval_recall": 0.9805879090404881, + "eval_runtime": 45.1173, + "eval_samples_per_second": 9.509, + "eval_steps_per_second": 1.596, + "step": 1270 + }, + { + "epoch": 4.28, + "eval_accuracy": 0.9968560133718924, + "eval_f1": 0.9744157101369105, + "eval_loss": 0.01655399613082409, + "eval_precision": 0.9718620689655172, + "eval_recall": 0.9769828064337216, + "eval_runtime": 45.0171, + "eval_samples_per_second": 9.53, + "eval_steps_per_second": 1.599, + "step": 1280 + }, + { + "epoch": 4.31, + "eval_accuracy": 0.9971080629327956, + "eval_f1": 0.9793600221637346, + "eval_loss": 0.015200940892100334, + "eval_precision": 0.9784112925546637, + "eval_recall": 0.9803105934553522, + "eval_runtime": 45.0877, + "eval_samples_per_second": 9.515, + "eval_steps_per_second": 1.597, + "step": 1290 + }, + { + "epoch": 4.35, + "eval_accuracy": 0.9968294818391659, + "eval_f1": 0.9751175006911805, + "eval_loss": 0.016674669459462166, + "eval_precision": 0.9721609702315325, + "eval_recall": 0.9780920687742651, + "eval_runtime": 45.3184, + "eval_samples_per_second": 9.466, + "eval_steps_per_second": 1.589, + "step": 1300 + }, + { + "epoch": 4.38, + "eval_accuracy": 0.9968294818391659, + "eval_f1": 0.9766413268832067, + "eval_loss": 0.01657554879784584, + "eval_precision": 0.9735464315238358, + "eval_recall": 0.9797559622850804, + "eval_runtime": 45.1144, + "eval_samples_per_second": 9.509, + "eval_steps_per_second": 1.596, + "step": 1310 + }, + { + "epoch": 4.41, + "eval_accuracy": 0.9968958106709824, + "eval_f1": 0.9781586950511474, + "eval_loss": 0.016078708693385124, + "eval_precision": 0.9751929437706726, + "eval_recall": 0.9811425402107599, + "eval_runtime": 45.1076, + "eval_samples_per_second": 9.511, + "eval_steps_per_second": 1.596, + "step": 1320 + }, + { + "epoch": 4.45, + "eval_accuracy": 0.9967764187737126, + "eval_f1": 0.9739251040221916, + "eval_loss": 0.015717538073658943, + "eval_precision": 0.9741953385127636, + "eval_recall": 0.973655019412091, + "eval_runtime": 45.0205, + "eval_samples_per_second": 9.529, + "eval_steps_per_second": 1.599, + "step": 1330 + }, + { + "epoch": 4.48, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9795127353266887, + "eval_loss": 0.015196431428194046, + "eval_precision": 0.9778883360972913, + "eval_recall": 0.9811425402107599, + "eval_runtime": 45.4378, + "eval_samples_per_second": 9.441, + "eval_steps_per_second": 1.585, + "step": 1340 + }, + { + "epoch": 4.52, + "eval_accuracy": 0.9967233557082593, + "eval_f1": 0.971064654575661, + "eval_loss": 0.015703538432717323, + "eval_precision": 0.9695880564003317, + "eval_recall": 0.9725457570715474, + "eval_runtime": 45.4022, + "eval_samples_per_second": 9.449, + "eval_steps_per_second": 1.586, + "step": 1350 + }, + { + "epoch": 4.55, + "eval_accuracy": 0.9968162160728026, + "eval_f1": 0.9755828390122776, + "eval_loss": 0.015339327044785023, + "eval_precision": 0.9706286027998902, + "eval_recall": 0.9805879090404881, + "eval_runtime": 45.4342, + "eval_samples_per_second": 9.442, + "eval_steps_per_second": 1.585, + "step": 1360 + }, + { + "epoch": 4.58, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9758587391364325, + "eval_loss": 0.015390865504741669, + "eval_precision": 0.9709031018391435, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.7337, + "eval_samples_per_second": 9.38, + "eval_steps_per_second": 1.574, + "step": 1370 + }, + { + "epoch": 4.62, + "eval_accuracy": 0.9969886710355257, + "eval_f1": 0.9785822854774078, + "eval_loss": 0.015035979449748993, + "eval_precision": 0.97521343982374, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.4663, + "eval_samples_per_second": 9.436, + "eval_steps_per_second": 1.584, + "step": 1380 + }, + { + "epoch": 4.65, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9809234171965718, + "eval_loss": 0.014896390959620476, + "eval_precision": 0.9779492833517089, + "eval_recall": 0.9839156960621187, + "eval_runtime": 45.0989, + "eval_samples_per_second": 9.512, + "eval_steps_per_second": 1.596, + "step": 1390 + }, + { + "epoch": 4.68, + "eval_accuracy": 0.9968560133718924, + "eval_f1": 0.9768339768339769, + "eval_loss": 0.015806537121534348, + "eval_precision": 0.9714755896873286, + "eval_recall": 0.9822518025513034, + "eval_runtime": 45.0416, + "eval_samples_per_second": 9.525, + "eval_steps_per_second": 1.599, + "step": 1400 + }, + { + "epoch": 4.72, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9778761061946902, + "eval_loss": 0.015140415169298649, + "eval_precision": 0.9751792608935466, + "eval_recall": 0.9805879090404881, + "eval_runtime": 45.6611, + "eval_samples_per_second": 9.395, + "eval_steps_per_second": 1.577, + "step": 1410 + }, + { + "epoch": 4.75, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9795353982300885, + "eval_loss": 0.015615841373801231, + "eval_precision": 0.9768339768339769, + "eval_recall": 0.9822518025513034, + "eval_runtime": 45.3516, + "eval_samples_per_second": 9.459, + "eval_steps_per_second": 1.588, + "step": 1420 + }, + { + "epoch": 4.78, + "eval_accuracy": 0.9971876575309756, + "eval_f1": 0.9802240354031254, + "eval_loss": 0.015609286725521088, + "eval_precision": 0.9776551724137931, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.8817, + "eval_samples_per_second": 9.35, + "eval_steps_per_second": 1.569, + "step": 1430 + }, + { + "epoch": 4.82, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.980777209238003, + "eval_loss": 0.015975775197148323, + "eval_precision": 0.9782068965517241, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.1602, + "eval_samples_per_second": 9.5, + "eval_steps_per_second": 1.594, + "step": 1440 + }, + { + "epoch": 4.85, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9810642709053213, + "eval_loss": 0.015114562585949898, + "eval_precision": 0.9779553596031965, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.0898, + "eval_samples_per_second": 9.514, + "eval_steps_per_second": 1.597, + "step": 1450 + }, + { + "epoch": 4.88, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9806576402321082, + "eval_loss": 0.01497586164623499, + "eval_precision": 0.977147577092511, + "eval_recall": 0.9841930116472546, + "eval_runtime": 47.111, + "eval_samples_per_second": 9.106, + "eval_steps_per_second": 1.528, + "step": 1460 + }, + { + "epoch": 4.92, + "eval_accuracy": 0.9970284683346157, + "eval_f1": 0.9779005524861878, + "eval_loss": 0.014804758131504059, + "eval_precision": 0.9741331865712713, + "eval_recall": 0.9816971713810316, + "eval_runtime": 45.7757, + "eval_samples_per_second": 9.372, + "eval_steps_per_second": 1.573, + "step": 1470 + }, + { + "epoch": 4.95, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9767634854771784, + "eval_loss": 0.015325279906392097, + "eval_precision": 0.9743377483443708, + "eval_recall": 0.9792013311148087, + "eval_runtime": 45.788, + "eval_samples_per_second": 9.369, + "eval_steps_per_second": 1.572, + "step": 1480 + }, + { + "epoch": 4.98, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9820143884892086, + "eval_loss": 0.014990455470979214, + "eval_precision": 0.9798453892876864, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.1213, + "eval_samples_per_second": 9.508, + "eval_steps_per_second": 1.596, + "step": 1490 + }, + { + "epoch": 5.02, + "learning_rate": 8.5e-06, + "loss": 0.0081, + "step": 1500 + }, + { + "epoch": 5.02, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9796821008984106, + "eval_loss": 0.01517151016741991, + "eval_precision": 0.9765775695783963, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.193, + "eval_samples_per_second": 9.493, + "eval_steps_per_second": 1.593, + "step": 1500 + }, + { + "epoch": 5.05, + "eval_accuracy": 0.9967366214746226, + "eval_f1": 0.9682408174537421, + "eval_loss": 0.01580970361828804, + "eval_precision": 0.9642464246424642, + "eval_recall": 0.9722684414864116, + "eval_runtime": 45.4381, + "eval_samples_per_second": 9.441, + "eval_steps_per_second": 1.585, + "step": 1510 + }, + { + "epoch": 5.08, + "eval_accuracy": 0.9966702926428059, + "eval_f1": 0.9666759233546237, + "eval_loss": 0.01570785790681839, + "eval_precision": 0.9680200222469411, + "eval_recall": 0.9653355518580145, + "eval_runtime": 45.4491, + "eval_samples_per_second": 9.439, + "eval_steps_per_second": 1.584, + "step": 1520 + }, + { + "epoch": 5.12, + "eval_accuracy": 0.9969886710355257, + "eval_f1": 0.9800388134183532, + "eval_loss": 0.015114962123334408, + "eval_precision": 0.9797671840354767, + "eval_recall": 0.9803105934553522, + "eval_runtime": 45.1961, + "eval_samples_per_second": 9.492, + "eval_steps_per_second": 1.593, + "step": 1530 + }, + { + "epoch": 5.15, + "eval_accuracy": 0.9970815314000689, + "eval_f1": 0.9818835569077582, + "eval_loss": 0.01481586042791605, + "eval_precision": 0.9793103448275862, + "eval_recall": 0.9844703272323905, + "eval_runtime": 45.6531, + "eval_samples_per_second": 9.397, + "eval_steps_per_second": 1.577, + "step": 1540 + }, + { + "epoch": 5.18, + "eval_accuracy": 0.9971478602318856, + "eval_f1": 0.9807878369039392, + "eval_loss": 0.014275978319346905, + "eval_precision": 0.9776798015982364, + "eval_recall": 0.9839156960621187, + "eval_runtime": 45.7198, + "eval_samples_per_second": 9.383, + "eval_steps_per_second": 1.575, + "step": 1550 + }, + { + "epoch": 5.22, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9792703150912107, + "eval_loss": 0.01468308549374342, + "eval_precision": 0.9760330578512396, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.5952, + "eval_samples_per_second": 9.409, + "eval_steps_per_second": 1.579, + "step": 1560 + }, + { + "epoch": 5.25, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9810695039380959, + "eval_loss": 0.014422405511140823, + "eval_precision": 0.977692095841366, + "eval_recall": 0.9844703272323905, + "eval_runtime": 45.732, + "eval_samples_per_second": 9.381, + "eval_steps_per_second": 1.574, + "step": 1570 + }, + { + "epoch": 5.28, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9809075816270061, + "eval_loss": 0.01434118952602148, + "eval_precision": 0.9787410270568746, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.5148, + "eval_samples_per_second": 9.425, + "eval_steps_per_second": 1.582, + "step": 1580 + }, + { + "epoch": 5.32, + "eval_accuracy": 0.9970019368018891, + "eval_f1": 0.9791350006908941, + "eval_loss": 0.014856048859655857, + "eval_precision": 0.9757642522721014, + "eval_recall": 0.9825291181364393, + "eval_runtime": 46.2893, + "eval_samples_per_second": 9.268, + "eval_steps_per_second": 1.555, + "step": 1590 + }, + { + "epoch": 5.35, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9803541781959049, + "eval_loss": 0.015461008064448833, + "eval_precision": 0.9781888459414688, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.2253, + "eval_samples_per_second": 9.486, + "eval_steps_per_second": 1.592, + "step": 1600 + }, + { + "epoch": 5.38, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9800884955752212, + "eval_loss": 0.01574764773249626, + "eval_precision": 0.9773855488141202, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.056, + "eval_samples_per_second": 9.521, + "eval_steps_per_second": 1.598, + "step": 1610 + }, + { + "epoch": 5.42, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9792645839093173, + "eval_loss": 0.015540325082838535, + "eval_precision": 0.976295479603087, + "eval_recall": 0.9822518025513034, + "eval_runtime": 45.1752, + "eval_samples_per_second": 9.496, + "eval_steps_per_second": 1.594, + "step": 1620 + }, + { + "epoch": 5.45, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9781526548672567, + "eval_loss": 0.015259744599461555, + "eval_precision": 0.9754550468836183, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.477, + "eval_samples_per_second": 9.433, + "eval_steps_per_second": 1.583, + "step": 1630 + }, + { + "epoch": 5.48, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9809075816270061, + "eval_loss": 0.01531192846596241, + "eval_precision": 0.9787410270568746, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.3307, + "eval_samples_per_second": 9.464, + "eval_steps_per_second": 1.588, + "step": 1640 + }, + { + "epoch": 5.52, + "eval_accuracy": 0.9970152025682524, + "eval_f1": 0.9804790253357331, + "eval_loss": 0.015362209640443325, + "eval_precision": 0.9789881116947746, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.5289, + "eval_samples_per_second": 9.423, + "eval_steps_per_second": 1.581, + "step": 1650 + }, + { + "epoch": 5.55, + "eval_accuracy": 0.9968825449046191, + "eval_f1": 0.9736151402127364, + "eval_loss": 0.016267232596874237, + "eval_precision": 0.9699972474538948, + "eval_recall": 0.9772601220188575, + "eval_runtime": 45.4269, + "eval_samples_per_second": 9.444, + "eval_steps_per_second": 1.585, + "step": 1660 + }, + { + "epoch": 5.59, + "eval_accuracy": 0.9970152025682524, + "eval_f1": 0.9776181265542968, + "eval_loss": 0.01579289324581623, + "eval_precision": 0.9741189427312775, + "eval_recall": 0.9811425402107599, + "eval_runtime": 46.1943, + "eval_samples_per_second": 9.287, + "eval_steps_per_second": 1.559, + "step": 1670 + }, + { + "epoch": 5.62, + "eval_accuracy": 0.9970284683346157, + "eval_f1": 0.9785407725321887, + "eval_loss": 0.01532789133489132, + "eval_precision": 0.9770528061929776, + "eval_recall": 0.9800332778702163, + "eval_runtime": 46.2745, + "eval_samples_per_second": 9.271, + "eval_steps_per_second": 1.556, + "step": 1680 + }, + { + "epoch": 5.65, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.979665237238899, + "eval_loss": 0.01536885742098093, + "eval_precision": 0.9773668230747999, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.136, + "eval_samples_per_second": 9.505, + "eval_steps_per_second": 1.595, + "step": 1690 + }, + { + "epoch": 5.69, + "eval_accuracy": 0.9969488737364357, + "eval_f1": 0.9772445179975174, + "eval_loss": 0.01578596606850624, + "eval_precision": 0.9720164609053498, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.0296, + "eval_samples_per_second": 9.527, + "eval_steps_per_second": 1.599, + "step": 1700 + }, + { + "epoch": 5.72, + "eval_accuracy": 0.9968560133718924, + "eval_f1": 0.9757040309221425, + "eval_loss": 0.01535722240805626, + "eval_precision": 0.971412864211105, + "eval_recall": 0.9800332778702163, + "eval_runtime": 45.2636, + "eval_samples_per_second": 9.478, + "eval_steps_per_second": 1.591, + "step": 1710 + }, + { + "epoch": 5.75, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9769496204278812, + "eval_loss": 0.01525976974517107, + "eval_precision": 0.9725199230557846, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.5287, + "eval_samples_per_second": 9.423, + "eval_steps_per_second": 1.581, + "step": 1720 + }, + { + "epoch": 5.79, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9776181265542968, + "eval_loss": 0.014473304152488708, + "eval_precision": 0.9741189427312775, + "eval_recall": 0.9811425402107599, + "eval_runtime": 45.6773, + "eval_samples_per_second": 9.392, + "eval_steps_per_second": 1.576, + "step": 1730 + }, + { + "epoch": 5.82, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9802349689011749, + "eval_loss": 0.013995842076838017, + "eval_precision": 0.9771286855883163, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.5912, + "eval_samples_per_second": 9.41, + "eval_steps_per_second": 1.579, + "step": 1740 + }, + { + "epoch": 5.85, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9808864265927978, + "eval_loss": 0.014489100314676762, + "eval_precision": 0.9798007747648035, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.3576, + "eval_samples_per_second": 9.458, + "eval_steps_per_second": 1.587, + "step": 1750 + }, + { + "epoch": 5.89, + "eval_accuracy": 0.9972274548300656, + "eval_f1": 0.9808864265927978, + "eval_loss": 0.014342778362333775, + "eval_precision": 0.9798007747648035, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.5077, + "eval_samples_per_second": 9.427, + "eval_steps_per_second": 1.582, + "step": 1760 + }, + { + "epoch": 5.92, + "eval_accuracy": 0.9972805178955189, + "eval_f1": 0.9800774764803543, + "eval_loss": 0.014782003127038479, + "eval_precision": 0.9779127553837659, + "eval_recall": 0.9822518025513034, + "eval_runtime": 45.3932, + "eval_samples_per_second": 9.451, + "eval_steps_per_second": 1.586, + "step": 1770 + }, + { + "epoch": 5.95, + "eval_accuracy": 0.9972009232973389, + "eval_f1": 0.978852798894264, + "eval_loss": 0.014779850840568542, + "eval_precision": 0.9757508955635161, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.6592, + "eval_samples_per_second": 9.396, + "eval_steps_per_second": 1.577, + "step": 1780 + }, + { + "epoch": 5.99, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.9799141155284665, + "eval_loss": 0.015209286473691463, + "eval_precision": 0.9789648491558262, + "eval_recall": 0.980865224625624, + "eval_runtime": 46.4662, + "eval_samples_per_second": 9.233, + "eval_steps_per_second": 1.55, + "step": 1790 + }, + { + "epoch": 6.02, + "eval_accuracy": 0.9971876575309756, + "eval_f1": 0.981011781011781, + "eval_loss": 0.015130845829844475, + "eval_precision": 0.9806040454419507, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.3314, + "eval_samples_per_second": 9.464, + "eval_steps_per_second": 1.588, + "step": 1800 + }, + { + "epoch": 6.05, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.9795297372060858, + "eval_loss": 0.015158111229538918, + "eval_precision": 0.9770971302428256, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.4446, + "eval_samples_per_second": 9.44, + "eval_steps_per_second": 1.584, + "step": 1810 + }, + { + "epoch": 6.09, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9782638792745397, + "eval_loss": 0.015132046304643154, + "eval_precision": 0.9767763339784352, + "eval_recall": 0.9797559622850804, + "eval_runtime": 45.4433, + "eval_samples_per_second": 9.44, + "eval_steps_per_second": 1.584, + "step": 1820 + }, + { + "epoch": 6.12, + "eval_accuracy": 0.9968825449046191, + "eval_f1": 0.9705516383243468, + "eval_loss": 0.01593286544084549, + "eval_precision": 0.967741935483871, + "eval_recall": 0.9733777038269551, + "eval_runtime": 45.4009, + "eval_samples_per_second": 9.449, + "eval_steps_per_second": 1.586, + "step": 1830 + }, + { + "epoch": 6.15, + "eval_accuracy": 0.9969356079700724, + "eval_f1": 0.9730774540936076, + "eval_loss": 0.014840428717434406, + "eval_precision": 0.9689304371734946, + "eval_recall": 0.9772601220188575, + "eval_runtime": 45.3952, + "eval_samples_per_second": 9.45, + "eval_steps_per_second": 1.586, + "step": 1840 + }, + { + "epoch": 6.19, + "eval_accuracy": 0.9966570268764426, + "eval_f1": 0.9764868603042877, + "eval_loss": 0.016662631183862686, + "eval_precision": 0.9740618101545254, + "eval_recall": 0.9789240155296728, + "eval_runtime": 45.439, + "eval_samples_per_second": 9.441, + "eval_steps_per_second": 1.585, + "step": 1850 + }, + { + "epoch": 6.22, + "eval_accuracy": 0.9965376349791727, + "eval_f1": 0.9750761561894212, + "eval_loss": 0.01773080602288246, + "eval_precision": 0.9737278761061947, + "eval_recall": 0.9764281752634498, + "eval_runtime": 45.6526, + "eval_samples_per_second": 9.397, + "eval_steps_per_second": 1.577, + "step": 1860 + }, + { + "epoch": 6.25, + "eval_accuracy": 0.9964182430819029, + "eval_f1": 0.9697221070095395, + "eval_loss": 0.018397442996501923, + "eval_precision": 0.9669148056244831, + "eval_recall": 0.9725457570715474, + "eval_runtime": 45.4014, + "eval_samples_per_second": 9.449, + "eval_steps_per_second": 1.586, + "step": 1870 + }, + { + "epoch": 6.29, + "eval_accuracy": 0.9966968241755326, + "eval_f1": 0.9773292784075199, + "eval_loss": 0.0170457661151886, + "eval_precision": 0.9743660418963617, + "eval_recall": 0.9803105934553522, + "eval_runtime": 45.6124, + "eval_samples_per_second": 9.405, + "eval_steps_per_second": 1.579, + "step": 1880 + }, + { + "epoch": 6.32, + "eval_accuracy": 0.9968029503064392, + "eval_f1": 0.9793046357615895, + "eval_loss": 0.01652824692428112, + "eval_precision": 0.9744645799011532, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.4345, + "eval_samples_per_second": 9.442, + "eval_steps_per_second": 1.585, + "step": 1890 + }, + { + "epoch": 6.35, + "eval_accuracy": 0.9967233557082593, + "eval_f1": 0.978476821192053, + "eval_loss": 0.016787514090538025, + "eval_precision": 0.9736408566721582, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.5818, + "eval_samples_per_second": 9.412, + "eval_steps_per_second": 1.58, + "step": 1900 + }, + { + "epoch": 6.39, + "eval_accuracy": 0.9965641665118994, + "eval_f1": 0.975320557010892, + "eval_loss": 0.01782037876546383, + "eval_precision": 0.9698382231971483, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.9556, + "eval_samples_per_second": 9.335, + "eval_steps_per_second": 1.567, + "step": 1910 + }, + { + "epoch": 6.42, + "eval_accuracy": 0.9966702926428059, + "eval_f1": 0.9768211920529801, + "eval_loss": 0.017588861286640167, + "eval_precision": 0.9719934102141681, + "eval_recall": 0.9816971713810316, + "eval_runtime": 45.1589, + "eval_samples_per_second": 9.5, + "eval_steps_per_second": 1.594, + "step": 1920 + }, + { + "epoch": 6.45, + "eval_accuracy": 0.9969488737364357, + "eval_f1": 0.9810799613313078, + "eval_loss": 0.016702750697731972, + "eval_precision": 0.9771664374140303, + "eval_recall": 0.9850249584026622, + "eval_runtime": 45.117, + "eval_samples_per_second": 9.509, + "eval_steps_per_second": 1.596, + "step": 1930 + }, + { + "epoch": 6.49, + "eval_accuracy": 0.9968029503064392, + "eval_f1": 0.9783179118906229, + "eval_loss": 0.016243569552898407, + "eval_precision": 0.9744154057771665, + "eval_recall": 0.9822518025513034, + "eval_runtime": 45.1078, + "eval_samples_per_second": 9.511, + "eval_steps_per_second": 1.596, + "step": 1940 + }, + { + "epoch": 6.52, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9773418071290412, + "eval_loss": 0.01623663119971752, + "eval_precision": 0.9738436123348018, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.7949, + "eval_samples_per_second": 9.368, + "eval_steps_per_second": 1.572, + "step": 1950 + }, + { + "epoch": 6.56, + "eval_accuracy": 0.9967896845400759, + "eval_f1": 0.9769496204278812, + "eval_loss": 0.016976099461317062, + "eval_precision": 0.9725199230557846, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.3661, + "eval_samples_per_second": 9.456, + "eval_steps_per_second": 1.587, + "step": 1960 + }, + { + "epoch": 6.59, + "eval_accuracy": 0.9967233557082593, + "eval_f1": 0.9759933774834438, + "eval_loss": 0.017524730414152145, + "eval_precision": 0.971169686985173, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.7337, + "eval_samples_per_second": 9.38, + "eval_steps_per_second": 1.574, + "step": 1970 + }, + { + "epoch": 6.62, + "eval_accuracy": 0.9968029503064392, + "eval_f1": 0.9794397681799365, + "eval_loss": 0.016901282593607903, + "eval_precision": 0.9747322164240593, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.4807, + "eval_samples_per_second": 9.433, + "eval_steps_per_second": 1.583, + "step": 1980 + }, + { + "epoch": 6.66, + "eval_accuracy": 0.9967896845400759, + "eval_f1": 0.9788761562888305, + "eval_loss": 0.01667814515531063, + "eval_precision": 0.9747044267253231, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.6668, + "eval_samples_per_second": 9.394, + "eval_steps_per_second": 1.577, + "step": 1990 + }, + { + "epoch": 6.69, + "learning_rate": 8.000000000000001e-06, + "loss": 0.0064, + "step": 2000 + }, + { + "epoch": 6.69, + "eval_accuracy": 0.9967100899418959, + "eval_f1": 0.9776428374275461, + "eval_loss": 0.016720809042453766, + "eval_precision": 0.9730769230769231, + "eval_recall": 0.9822518025513034, + "eval_runtime": 45.4912, + "eval_samples_per_second": 9.43, + "eval_steps_per_second": 1.583, + "step": 2000 + }, + { + "epoch": 6.72, + "eval_accuracy": 0.9966172295773527, + "eval_f1": 0.9770781552057443, + "eval_loss": 0.017004678025841713, + "eval_precision": 0.9730473047304731, + "eval_recall": 0.9811425402107599, + "eval_runtime": 45.7222, + "eval_samples_per_second": 9.383, + "eval_steps_per_second": 1.575, + "step": 2010 + }, + { + "epoch": 6.76, + "eval_accuracy": 0.9966172295773527, + "eval_f1": 0.9774892970584174, + "eval_loss": 0.016945617273449898, + "eval_precision": 0.9735900962861073, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.3731, + "eval_samples_per_second": 9.455, + "eval_steps_per_second": 1.587, + "step": 2020 + }, + { + "epoch": 6.79, + "eval_accuracy": 0.9967100899418959, + "eval_f1": 0.9788994621431527, + "eval_loss": 0.016928063705563545, + "eval_precision": 0.9736625514403292, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.8513, + "eval_samples_per_second": 9.356, + "eval_steps_per_second": 1.57, + "step": 2030 + }, + { + "epoch": 6.82, + "eval_accuracy": 0.9967896845400759, + "eval_f1": 0.9779675020655467, + "eval_loss": 0.01666436158120632, + "eval_precision": 0.9712800875273523, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.4258, + "eval_samples_per_second": 9.444, + "eval_steps_per_second": 1.585, + "step": 2040 + }, + { + "epoch": 6.86, + "eval_accuracy": 0.9968560133718924, + "eval_f1": 0.9770130763936682, + "eval_loss": 0.016234010457992554, + "eval_precision": 0.969937141295436, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.6745, + "eval_samples_per_second": 9.393, + "eval_steps_per_second": 1.576, + "step": 2050 + }, + { + "epoch": 6.89, + "eval_accuracy": 0.9968029503064392, + "eval_f1": 0.9758986365514392, + "eval_loss": 0.015693064779043198, + "eval_precision": 0.9693570451436389, + "eval_recall": 0.9825291181364393, + "eval_runtime": 46.507, + "eval_samples_per_second": 9.224, + "eval_steps_per_second": 1.548, + "step": 2060 + }, + { + "epoch": 6.92, + "eval_accuracy": 0.9968825449046191, + "eval_f1": 0.978476821192053, + "eval_loss": 0.015247814357280731, + "eval_precision": 0.9736408566721582, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.4327, + "eval_samples_per_second": 9.443, + "eval_steps_per_second": 1.585, + "step": 2070 + }, + { + "epoch": 6.96, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9795636564484949, + "eval_loss": 0.01587117277085781, + "eval_precision": 0.9755225522552256, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.1551, + "eval_samples_per_second": 9.501, + "eval_steps_per_second": 1.595, + "step": 2080 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.9967100899418959, + "eval_f1": 0.9783776339347198, + "eval_loss": 0.017061389982700348, + "eval_precision": 0.9718194254445964, + "eval_recall": 0.9850249584026622, + "eval_runtime": 45.1957, + "eval_samples_per_second": 9.492, + "eval_steps_per_second": 1.593, + "step": 2090 + }, + { + "epoch": 7.02, + "eval_accuracy": 0.9965906980446261, + "eval_f1": 0.9775821757667446, + "eval_loss": 0.018233157694339752, + "eval_precision": 0.9697135061391542, + "eval_recall": 0.985579589572934, + "eval_runtime": 45.2503, + "eval_samples_per_second": 9.481, + "eval_steps_per_second": 1.591, + "step": 2100 + }, + { + "epoch": 7.06, + "eval_accuracy": 0.9970019368018891, + "eval_f1": 0.9790113228389947, + "eval_loss": 0.01590120792388916, + "eval_precision": 0.974972497249725, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.4565, + "eval_samples_per_second": 9.438, + "eval_steps_per_second": 1.584, + "step": 2110 + }, + { + "epoch": 7.09, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9770971302428255, + "eval_loss": 0.015739668160676956, + "eval_precision": 0.972267984623833, + "eval_recall": 0.9819744869661675, + "eval_runtime": 46.0012, + "eval_samples_per_second": 9.326, + "eval_steps_per_second": 1.565, + "step": 2120 + }, + { + "epoch": 7.12, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.9790113228389947, + "eval_loss": 0.015736253932118416, + "eval_precision": 0.974972497249725, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.4255, + "eval_samples_per_second": 9.444, + "eval_steps_per_second": 1.585, + "step": 2130 + }, + { + "epoch": 7.16, + "eval_accuracy": 0.9968825449046191, + "eval_f1": 0.9784827586206897, + "eval_loss": 0.016040045768022537, + "eval_precision": 0.9733809001097695, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.5296, + "eval_samples_per_second": 9.422, + "eval_steps_per_second": 1.581, + "step": 2140 + }, + { + "epoch": 7.19, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.9786413118368471, + "eval_loss": 0.01620173640549183, + "eval_precision": 0.9726102437688304, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.6034, + "eval_samples_per_second": 9.407, + "eval_steps_per_second": 1.579, + "step": 2150 + }, + { + "epoch": 7.22, + "eval_accuracy": 0.9969886710355257, + "eval_f1": 0.9798565121412803, + "eval_loss": 0.0158588457852602, + "eval_precision": 0.9750137287204832, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.789, + "eval_samples_per_second": 9.369, + "eval_steps_per_second": 1.572, + "step": 2160 + }, + { + "epoch": 7.26, + "eval_accuracy": 0.9969488737364357, + "eval_f1": 0.9791522849647936, + "eval_loss": 0.015738798305392265, + "eval_precision": 0.9749793786087435, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.511, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.582, + "step": 2170 + }, + { + "epoch": 7.29, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9794113582976371, + "eval_loss": 0.015642931684851646, + "eval_precision": 0.976039658496282, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.1132, + "eval_samples_per_second": 9.509, + "eval_steps_per_second": 1.596, + "step": 2180 + }, + { + "epoch": 7.32, + "eval_accuracy": 0.9971611259982489, + "eval_f1": 0.9811998894111142, + "eval_loss": 0.015118683688342571, + "eval_precision": 0.9782249173098125, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.0629, + "eval_samples_per_second": 9.52, + "eval_steps_per_second": 1.598, + "step": 2190 + }, + { + "epoch": 7.36, + "eval_accuracy": 0.9973601124936987, + "eval_f1": 0.982160143825197, + "eval_loss": 0.015025600790977478, + "eval_precision": 0.9795862068965517, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.3715, + "eval_samples_per_second": 9.455, + "eval_steps_per_second": 1.587, + "step": 2200 + }, + { + "epoch": 7.39, + "eval_accuracy": 0.9972539863627922, + "eval_f1": 0.9786821705426357, + "eval_loss": 0.014340460300445557, + "eval_precision": 0.9770591487009398, + "eval_recall": 0.9803105934553522, + "eval_runtime": 45.1287, + "eval_samples_per_second": 9.506, + "eval_steps_per_second": 1.595, + "step": 2210 + }, + { + "epoch": 7.42, + "eval_accuracy": 0.9973335809609721, + "eval_f1": 0.9813097051086805, + "eval_loss": 0.0135923121124506, + "eval_precision": 0.979817528338402, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.1943, + "eval_samples_per_second": 9.492, + "eval_steps_per_second": 1.593, + "step": 2220 + }, + { + "epoch": 7.46, + "eval_accuracy": 0.9972009232973389, + "eval_f1": 0.978570441034149, + "eval_loss": 0.014164491556584835, + "eval_precision": 0.9757375241246209, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.309, + "eval_samples_per_second": 9.468, + "eval_steps_per_second": 1.589, + "step": 2230 + }, + { + "epoch": 7.49, + "eval_accuracy": 0.9972672521291555, + "eval_f1": 0.9806308799114555, + "eval_loss": 0.01402602344751358, + "eval_precision": 0.9784649364991718, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.5204, + "eval_samples_per_second": 9.424, + "eval_steps_per_second": 1.582, + "step": 2240 + }, + { + "epoch": 7.53, + "eval_accuracy": 0.9972539863627922, + "eval_f1": 0.9809181415929205, + "eval_loss": 0.014084520749747753, + "eval_precision": 0.9782129067843354, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.1882, + "eval_samples_per_second": 9.494, + "eval_steps_per_second": 1.593, + "step": 2250 + }, + { + "epoch": 7.56, + "eval_accuracy": 0.9972937836618821, + "eval_f1": 0.9835111542192047, + "eval_loss": 0.014140544459223747, + "eval_precision": 0.9828302409304902, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.1162, + "eval_samples_per_second": 9.509, + "eval_steps_per_second": 1.596, + "step": 2260 + }, + { + "epoch": 7.59, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.980484429065744, + "eval_loss": 0.015128599479794502, + "eval_precision": 0.9787234042553191, + "eval_recall": 0.9822518025513034, + "eval_runtime": 45.1622, + "eval_samples_per_second": 9.499, + "eval_steps_per_second": 1.594, + "step": 2270 + }, + { + "epoch": 7.63, + "eval_accuracy": 0.9971743917646122, + "eval_f1": 0.9838017444275232, + "eval_loss": 0.014916467480361462, + "eval_precision": 0.9823057782692839, + "eval_recall": 0.9853022739877981, + "eval_runtime": 45.3078, + "eval_samples_per_second": 9.469, + "eval_steps_per_second": 1.589, + "step": 2280 + }, + { + "epoch": 7.66, + "eval_accuracy": 0.9971876575309756, + "eval_f1": 0.9826989619377162, + "eval_loss": 0.014806441031396389, + "eval_precision": 0.980933959657364, + "eval_recall": 0.9844703272323905, + "eval_runtime": 45.215, + "eval_samples_per_second": 9.488, + "eval_steps_per_second": 1.592, + "step": 2290 + }, + { + "epoch": 7.69, + "eval_accuracy": 0.9973335809609721, + "eval_f1": 0.9828444936358606, + "eval_loss": 0.014511052519083023, + "eval_precision": 0.9806736609607951, + "eval_recall": 0.9850249584026622, + "eval_runtime": 45.1721, + "eval_samples_per_second": 9.497, + "eval_steps_per_second": 1.594, + "step": 2300 + }, + { + "epoch": 7.73, + "eval_accuracy": 0.9973070494282454, + "eval_f1": 0.9824124082537044, + "eval_loss": 0.014819289557635784, + "eval_precision": 0.9811894882434301, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.484, + "eval_samples_per_second": 9.432, + "eval_steps_per_second": 1.583, + "step": 2310 + }, + { + "epoch": 7.76, + "eval_accuracy": 0.9971611259982489, + "eval_f1": 0.9818935729094679, + "eval_loss": 0.015233664773404598, + "eval_precision": 0.9787820336180766, + "eval_recall": 0.9850249584026622, + "eval_runtime": 45.245, + "eval_samples_per_second": 9.482, + "eval_steps_per_second": 1.591, + "step": 2320 + }, + { + "epoch": 7.79, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9814865985078752, + "eval_loss": 0.01592710055410862, + "eval_precision": 0.9779735682819384, + "eval_recall": 0.9850249584026622, + "eval_runtime": 45.1068, + "eval_samples_per_second": 9.511, + "eval_steps_per_second": 1.596, + "step": 2330 + }, + { + "epoch": 7.83, + "eval_accuracy": 0.9970019368018891, + "eval_f1": 0.9807984528249758, + "eval_loss": 0.01635568030178547, + "eval_precision": 0.9771538673272777, + "eval_recall": 0.9844703272323905, + "eval_runtime": 45.3054, + "eval_samples_per_second": 9.469, + "eval_steps_per_second": 1.589, + "step": 2340 + }, + { + "epoch": 7.86, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.9810747340792927, + "eval_loss": 0.015507887117564678, + "eval_precision": 0.9774291219377924, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.3049, + "eval_samples_per_second": 9.469, + "eval_steps_per_second": 1.589, + "step": 2350 + }, + { + "epoch": 7.89, + "eval_accuracy": 0.9971743917646122, + "eval_f1": 0.9814814814814814, + "eval_loss": 0.01488608680665493, + "eval_precision": 0.978236914600551, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.3669, + "eval_samples_per_second": 9.456, + "eval_steps_per_second": 1.587, + "step": 2360 + }, + { + "epoch": 7.93, + "eval_accuracy": 0.9969090764373458, + "eval_f1": 0.9790055248618784, + "eval_loss": 0.016140291467308998, + "eval_precision": 0.9752339020363237, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.5755, + "eval_samples_per_second": 9.413, + "eval_steps_per_second": 1.58, + "step": 2370 + }, + { + "epoch": 7.96, + "eval_accuracy": 0.9968427476055292, + "eval_f1": 0.980511402902557, + "eval_loss": 0.01678432524204254, + "eval_precision": 0.9774042435932764, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.3412, + "eval_samples_per_second": 9.462, + "eval_steps_per_second": 1.588, + "step": 2380 + }, + { + "epoch": 7.99, + "eval_accuracy": 0.9968162160728026, + "eval_f1": 0.9803704727674868, + "eval_loss": 0.016629330813884735, + "eval_precision": 0.9773980154355016, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.5099, + "eval_samples_per_second": 9.427, + "eval_steps_per_second": 1.582, + "step": 2390 + }, + { + "epoch": 8.03, + "eval_accuracy": 0.9968560133718924, + "eval_f1": 0.9798119469026549, + "eval_loss": 0.01662250980734825, + "eval_precision": 0.9771097628240485, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.337, + "eval_samples_per_second": 9.462, + "eval_steps_per_second": 1.588, + "step": 2400 + }, + { + "epoch": 8.06, + "eval_accuracy": 0.9970019368018891, + "eval_f1": 0.9813355454168395, + "eval_loss": 0.0163425225764513, + "eval_precision": 0.978494623655914, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.4794, + "eval_samples_per_second": 9.433, + "eval_steps_per_second": 1.583, + "step": 2410 + }, + { + "epoch": 8.09, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9811842833425568, + "eval_loss": 0.016381612047553062, + "eval_precision": 0.9790171176145775, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.1726, + "eval_samples_per_second": 9.497, + "eval_steps_per_second": 1.594, + "step": 2420 + }, + { + "epoch": 8.13, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.9817528338401991, + "eval_loss": 0.016340401023626328, + "eval_precision": 0.9787761852260198, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.3524, + "eval_samples_per_second": 9.459, + "eval_steps_per_second": 1.588, + "step": 2430 + }, + { + "epoch": 8.16, + "eval_accuracy": 0.9970019368018891, + "eval_f1": 0.980771890994605, + "eval_loss": 0.016326354816555977, + "eval_precision": 0.9784708804857852, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.2927, + "eval_samples_per_second": 9.472, + "eval_steps_per_second": 1.59, + "step": 2440 + }, + { + "epoch": 8.19, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9817275747508305, + "eval_loss": 0.015510362572968006, + "eval_precision": 0.9800995024875622, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.5096, + "eval_samples_per_second": 9.427, + "eval_steps_per_second": 1.582, + "step": 2450 + }, + { + "epoch": 8.23, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9824464409122322, + "eval_loss": 0.015236412174999714, + "eval_precision": 0.9793331496279967, + "eval_recall": 0.985579589572934, + "eval_runtime": 45.3704, + "eval_samples_per_second": 9.455, + "eval_steps_per_second": 1.587, + "step": 2460 + }, + { + "epoch": 8.26, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9814865985078752, + "eval_loss": 0.015579747967422009, + "eval_precision": 0.9779735682819384, + "eval_recall": 0.9850249584026622, + "eval_runtime": 45.2219, + "eval_samples_per_second": 9.487, + "eval_steps_per_second": 1.592, + "step": 2470 + }, + { + "epoch": 8.29, + "eval_accuracy": 0.9972009232973389, + "eval_f1": 0.9789590254706533, + "eval_loss": 0.014976629987359047, + "eval_precision": 0.9773355444997236, + "eval_recall": 0.9805879090404881, + "eval_runtime": 45.5521, + "eval_samples_per_second": 9.418, + "eval_steps_per_second": 1.581, + "step": 2480 + }, + { + "epoch": 8.33, + "eval_accuracy": 0.9973335809609721, + "eval_f1": 0.9824318716281643, + "eval_loss": 0.014180959202349186, + "eval_precision": 0.9801269666022633, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.6388, + "eval_samples_per_second": 9.4, + "eval_steps_per_second": 1.578, + "step": 2490 + }, + { + "epoch": 8.36, + "learning_rate": 7.500000000000001e-06, + "loss": 0.0051, + "step": 2500 + }, + { + "epoch": 8.36, + "eval_accuracy": 0.9972274548300656, + "eval_f1": 0.9791522849647936, + "eval_loss": 0.014656171202659607, + "eval_precision": 0.9749793786087435, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.2727, + "eval_samples_per_second": 9.476, + "eval_steps_per_second": 1.59, + "step": 2500 + }, + { + "epoch": 8.39, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.9780477702609416, + "eval_loss": 0.014881155453622341, + "eval_precision": 0.9738795710750618, + "eval_recall": 0.9822518025513034, + "eval_runtime": 45.7124, + "eval_samples_per_second": 9.385, + "eval_steps_per_second": 1.575, + "step": 2510 + }, + { + "epoch": 8.43, + "eval_accuracy": 0.9972141890637022, + "eval_f1": 0.9806629834254144, + "eval_loss": 0.014929546974599361, + "eval_precision": 0.976884975233902, + "eval_recall": 0.9844703272323905, + "eval_runtime": 45.3886, + "eval_samples_per_second": 9.452, + "eval_steps_per_second": 1.586, + "step": 2520 + }, + { + "epoch": 8.46, + "eval_accuracy": 0.9972009232973389, + "eval_f1": 0.9802404311178665, + "eval_loss": 0.014953644014894962, + "eval_precision": 0.976865877168824, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.8505, + "eval_samples_per_second": 9.356, + "eval_steps_per_second": 1.57, + "step": 2530 + }, + { + "epoch": 8.49, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9813252178724583, + "eval_loss": 0.014914426021277905, + "eval_precision": 0.979022909191278, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.5694, + "eval_samples_per_second": 9.414, + "eval_steps_per_second": 1.58, + "step": 2540 + }, + { + "epoch": 8.53, + "eval_accuracy": 0.9972672521291555, + "eval_f1": 0.9826941713969264, + "eval_loss": 0.014766544103622437, + "eval_precision": 0.9811998894111141, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.2509, + "eval_samples_per_second": 9.48, + "eval_steps_per_second": 1.591, + "step": 2550 + }, + { + "epoch": 8.56, + "eval_accuracy": 0.9972407205964289, + "eval_f1": 0.9839468585662884, + "eval_loss": 0.015172240324318409, + "eval_precision": 0.9820441988950276, + "eval_recall": 0.9858569051580699, + "eval_runtime": 45.7001, + "eval_samples_per_second": 9.387, + "eval_steps_per_second": 1.575, + "step": 2560 + }, + { + "epoch": 8.6, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9829993089149965, + "eval_loss": 0.015841683372855186, + "eval_precision": 0.9798842656379168, + "eval_recall": 0.9861342207432058, + "eval_runtime": 45.6884, + "eval_samples_per_second": 9.39, + "eval_steps_per_second": 1.576, + "step": 2570 + }, + { + "epoch": 8.63, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9813407049067036, + "eval_loss": 0.016232503578066826, + "eval_precision": 0.9782309176081565, + "eval_recall": 0.9844703272323905, + "eval_runtime": 45.3554, + "eval_samples_per_second": 9.459, + "eval_steps_per_second": 1.587, + "step": 2580 + }, + { + "epoch": 8.66, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9763714246234628, + "eval_loss": 0.015989486128091812, + "eval_precision": 0.9730101900302947, + "eval_recall": 0.9797559622850804, + "eval_runtime": 45.3338, + "eval_samples_per_second": 9.463, + "eval_steps_per_second": 1.588, + "step": 2590 + }, + { + "epoch": 8.7, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.9746922970543493, + "eval_loss": 0.015164612792432308, + "eval_precision": 0.9721379310344828, + "eval_recall": 0.9772601220188575, + "eval_runtime": 45.3618, + "eval_samples_per_second": 9.457, + "eval_steps_per_second": 1.587, + "step": 2600 + }, + { + "epoch": 8.73, + "eval_accuracy": 0.9970284683346157, + "eval_f1": 0.9752111895859299, + "eval_loss": 0.015361527912318707, + "eval_precision": 0.9739972337482711, + "eval_recall": 0.9764281752634498, + "eval_runtime": 45.7043, + "eval_samples_per_second": 9.386, + "eval_steps_per_second": 1.575, + "step": 2610 + }, + { + "epoch": 8.76, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9776057506220625, + "eval_loss": 0.015215002000331879, + "eval_precision": 0.9746416758544653, + "eval_recall": 0.9805879090404881, + "eval_runtime": 45.5622, + "eval_samples_per_second": 9.416, + "eval_steps_per_second": 1.58, + "step": 2620 + }, + { + "epoch": 8.8, + "eval_accuracy": 0.9970815314000689, + "eval_f1": 0.9777470628887354, + "eval_loss": 0.014545532874763012, + "eval_precision": 0.974648663543676, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.4027, + "eval_samples_per_second": 9.449, + "eval_steps_per_second": 1.586, + "step": 2630 + }, + { + "epoch": 8.83, + "eval_accuracy": 0.9971611259982489, + "eval_f1": 0.9813355454168395, + "eval_loss": 0.014298198744654655, + "eval_precision": 0.978494623655914, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.6015, + "eval_samples_per_second": 9.408, + "eval_steps_per_second": 1.579, + "step": 2640 + }, + { + "epoch": 8.86, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.9804952275556785, + "eval_loss": 0.014669723808765411, + "eval_precision": 0.978194866133039, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.6583, + "eval_samples_per_second": 9.396, + "eval_steps_per_second": 1.577, + "step": 2650 + }, + { + "epoch": 8.9, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9749550297495504, + "eval_loss": 0.014710523188114166, + "eval_precision": 0.9729356531344933, + "eval_recall": 0.9769828064337216, + "eval_runtime": 45.2863, + "eval_samples_per_second": 9.473, + "eval_steps_per_second": 1.59, + "step": 2660 + }, + { + "epoch": 8.93, + "eval_accuracy": 0.9972141890637022, + "eval_f1": 0.9820094104622197, + "eval_loss": 0.014592879451811314, + "eval_precision": 0.980110497237569, + "eval_recall": 0.9839156960621187, + "eval_runtime": 45.3618, + "eval_samples_per_second": 9.457, + "eval_steps_per_second": 1.587, + "step": 2670 + }, + { + "epoch": 8.96, + "eval_accuracy": 0.9971876575309756, + "eval_f1": 0.9817477876106194, + "eval_loss": 0.014163294807076454, + "eval_precision": 0.9790402647545504, + "eval_recall": 0.9844703272323905, + "eval_runtime": 45.3659, + "eval_samples_per_second": 9.456, + "eval_steps_per_second": 1.587, + "step": 2680 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9792932081722805, + "eval_loss": 0.01418287307024002, + "eval_precision": 0.9749862561847169, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.6259, + "eval_samples_per_second": 9.403, + "eval_steps_per_second": 1.578, + "step": 2690 + }, + { + "epoch": 9.03, + "eval_accuracy": 0.9971478602318856, + "eval_f1": 0.9809286898839138, + "eval_loss": 0.014496715739369392, + "eval_precision": 0.9776859504132231, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.371, + "eval_samples_per_second": 9.455, + "eval_steps_per_second": 1.587, + "step": 2700 + }, + { + "epoch": 9.06, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9802295036637633, + "eval_loss": 0.014654590748250484, + "eval_precision": 0.9773917838433968, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.386, + "eval_samples_per_second": 9.452, + "eval_steps_per_second": 1.586, + "step": 2710 + }, + { + "epoch": 9.1, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9795410561238596, + "eval_loss": 0.014612732455134392, + "eval_precision": 0.9765711135611908, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.3853, + "eval_samples_per_second": 9.452, + "eval_steps_per_second": 1.586, + "step": 2720 + }, + { + "epoch": 9.13, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9796989366109652, + "eval_loss": 0.014642550610005856, + "eval_precision": 0.9757909215955983, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.583, + "eval_samples_per_second": 9.411, + "eval_steps_per_second": 1.58, + "step": 2730 + }, + { + "epoch": 9.16, + "eval_accuracy": 0.9970815314000689, + "eval_f1": 0.9792817679558011, + "eval_loss": 0.014756478369235992, + "eval_precision": 0.9755090809025867, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.3958, + "eval_samples_per_second": 9.45, + "eval_steps_per_second": 1.586, + "step": 2740 + }, + { + "epoch": 9.2, + "eval_accuracy": 0.9971611259982489, + "eval_f1": 0.9781526548672567, + "eval_loss": 0.014514540322124958, + "eval_precision": 0.9754550468836183, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.3479, + "eval_samples_per_second": 9.46, + "eval_steps_per_second": 1.588, + "step": 2750 + }, + { + "epoch": 9.23, + "eval_accuracy": 0.9971876575309756, + "eval_f1": 0.9780234968901176, + "eval_loss": 0.014607676304876804, + "eval_precision": 0.974924221548636, + "eval_recall": 0.9811425402107599, + "eval_runtime": 45.3679, + "eval_samples_per_second": 9.456, + "eval_steps_per_second": 1.587, + "step": 2760 + }, + { + "epoch": 9.26, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.9799585348997927, + "eval_loss": 0.014230134896934032, + "eval_precision": 0.9768531275833563, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.6606, + "eval_samples_per_second": 9.395, + "eval_steps_per_second": 1.577, + "step": 2770 + }, + { + "epoch": 9.3, + "eval_accuracy": 0.9970815314000689, + "eval_f1": 0.9814814814814814, + "eval_loss": 0.014004090800881386, + "eval_precision": 0.978236914600551, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.4655, + "eval_samples_per_second": 9.436, + "eval_steps_per_second": 1.584, + "step": 2780 + }, + { + "epoch": 9.33, + "eval_accuracy": 0.9971080629327956, + "eval_f1": 0.980359612724758, + "eval_loss": 0.013850794173777103, + "eval_precision": 0.977924944812362, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.4768, + "eval_samples_per_second": 9.433, + "eval_steps_per_second": 1.583, + "step": 2790 + }, + { + "epoch": 9.36, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9795070617557464, + "eval_loss": 0.014602423645555973, + "eval_precision": 0.9781526548672567, + "eval_recall": 0.980865224625624, + "eval_runtime": 46.0797, + "eval_samples_per_second": 9.31, + "eval_steps_per_second": 1.563, + "step": 2800 + }, + { + "epoch": 9.4, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9811738648947951, + "eval_loss": 0.01487213745713234, + "eval_precision": 0.9795467108899945, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.4621, + "eval_samples_per_second": 9.436, + "eval_steps_per_second": 1.584, + "step": 2810 + }, + { + "epoch": 9.43, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9789648491558263, + "eval_loss": 0.01481552142649889, + "eval_precision": 0.9770718232044199, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.388, + "eval_samples_per_second": 9.452, + "eval_steps_per_second": 1.586, + "step": 2820 + }, + { + "epoch": 9.46, + "eval_accuracy": 0.9973601124936987, + "eval_f1": 0.9822763777346994, + "eval_loss": 0.01404637098312378, + "eval_precision": 0.9809181415929203, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.4647, + "eval_samples_per_second": 9.436, + "eval_steps_per_second": 1.584, + "step": 2830 + }, + { + "epoch": 9.5, + "eval_accuracy": 0.9969488737364357, + "eval_f1": 0.9712539925010416, + "eval_loss": 0.014825278893113136, + "eval_precision": 0.972739916550765, + "eval_recall": 0.9697726012201886, + "eval_runtime": 45.3762, + "eval_samples_per_second": 9.454, + "eval_steps_per_second": 1.587, + "step": 2840 + }, + { + "epoch": 9.53, + "eval_accuracy": 0.9964978376800828, + "eval_f1": 0.9593767390094602, + "eval_loss": 0.015539586544036865, + "eval_precision": 0.9625907314349526, + "eval_recall": 0.9561841375485303, + "eval_runtime": 45.6548, + "eval_samples_per_second": 9.397, + "eval_steps_per_second": 1.577, + "step": 2850 + }, + { + "epoch": 9.57, + "eval_accuracy": 0.9964978376800828, + "eval_f1": 0.9605006954102921, + "eval_loss": 0.016403868794441223, + "eval_precision": 0.9634486607142857, + "eval_recall": 0.9575707154742097, + "eval_runtime": 45.6372, + "eval_samples_per_second": 9.4, + "eval_steps_per_second": 1.578, + "step": 2860 + }, + { + "epoch": 9.6, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9791350006908941, + "eval_loss": 0.015382306650280952, + "eval_precision": 0.9757642522721014, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.4505, + "eval_samples_per_second": 9.439, + "eval_steps_per_second": 1.584, + "step": 2870 + }, + { + "epoch": 9.63, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9781647318960752, + "eval_loss": 0.015797466039657593, + "eval_precision": 0.974931129476584, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.42, + "eval_samples_per_second": 9.445, + "eval_steps_per_second": 1.585, + "step": 2880 + }, + { + "epoch": 9.67, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.9791637919138954, + "eval_loss": 0.01617647521197796, + "eval_precision": 0.9744575666025818, + "eval_recall": 0.9839156960621187, + "eval_runtime": 45.4393, + "eval_samples_per_second": 9.441, + "eval_steps_per_second": 1.585, + "step": 2890 + }, + { + "epoch": 9.7, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.978629532607197, + "eval_loss": 0.0164639949798584, + "eval_precision": 0.9731285988483686, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.4559, + "eval_samples_per_second": 9.438, + "eval_steps_per_second": 1.584, + "step": 2900 + }, + { + "epoch": 9.73, + "eval_accuracy": 0.9969886710355257, + "eval_f1": 0.9791810285399146, + "eval_loss": 0.016681063920259476, + "eval_precision": 0.9736769947902385, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.5001, + "eval_samples_per_second": 9.429, + "eval_steps_per_second": 1.582, + "step": 2910 + }, + { + "epoch": 9.77, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9808090570205716, + "eval_loss": 0.016886161640286446, + "eval_precision": 0.9766290899092659, + "eval_recall": 0.9850249584026622, + "eval_runtime": 45.9762, + "eval_samples_per_second": 9.331, + "eval_steps_per_second": 1.566, + "step": 2920 + }, + { + "epoch": 9.8, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9787351560342448, + "eval_loss": 0.017214221879839897, + "eval_precision": 0.9746974697469747, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.4576, + "eval_samples_per_second": 9.437, + "eval_steps_per_second": 1.584, + "step": 2930 + }, + { + "epoch": 9.83, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.9816069699903195, + "eval_loss": 0.016497749835252762, + "eval_precision": 0.9790344827586207, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.3849, + "eval_samples_per_second": 9.452, + "eval_steps_per_second": 1.586, + "step": 2940 + }, + { + "epoch": 9.87, + "eval_accuracy": 0.9971876575309756, + "eval_f1": 0.9821650767316467, + "eval_loss": 0.01616404764354229, + "eval_precision": 0.9793217535153019, + "eval_recall": 0.9850249584026622, + "eval_runtime": 45.4779, + "eval_samples_per_second": 9.433, + "eval_steps_per_second": 1.583, + "step": 2950 + }, + { + "epoch": 9.9, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9790829754813686, + "eval_loss": 0.016479264944791794, + "eval_precision": 0.9781345142540825, + "eval_recall": 0.9800332778702163, + "eval_runtime": 45.8218, + "eval_samples_per_second": 9.362, + "eval_steps_per_second": 1.571, + "step": 2960 + }, + { + "epoch": 9.93, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.980060924951537, + "eval_loss": 0.016018539667129517, + "eval_precision": 0.9787057522123894, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.4831, + "eval_samples_per_second": 9.432, + "eval_steps_per_second": 1.583, + "step": 2970 + }, + { + "epoch": 9.97, + "eval_accuracy": 0.9970284683346157, + "eval_f1": 0.9785348289710566, + "eval_loss": 0.016076229512691498, + "eval_precision": 0.9773167358229599, + "eval_recall": 0.9797559622850804, + "eval_runtime": 45.4108, + "eval_samples_per_second": 9.447, + "eval_steps_per_second": 1.586, + "step": 2980 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.9970019368018891, + "eval_f1": 0.9797895902547065, + "eval_loss": 0.01618347130715847, + "eval_precision": 0.9781647318960752, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.4332, + "eval_samples_per_second": 9.442, + "eval_steps_per_second": 1.585, + "step": 2990 + }, + { + "epoch": 10.03, + "learning_rate": 7e-06, + "loss": 0.0046, + "step": 3000 + }, + { + "epoch": 10.03, + "eval_accuracy": 0.9970019368018891, + "eval_f1": 0.9791234619106871, + "eval_loss": 0.016251519322395325, + "eval_precision": 0.9762889440308795, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.7254, + "eval_samples_per_second": 9.382, + "eval_steps_per_second": 1.575, + "step": 3000 + }, + { + "epoch": 10.07, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9806576402321082, + "eval_loss": 0.0159316323697567, + "eval_precision": 0.977147577092511, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.9937, + "eval_samples_per_second": 9.327, + "eval_steps_per_second": 1.565, + "step": 3010 + }, + { + "epoch": 10.1, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9798007747648035, + "eval_loss": 0.015915466472506523, + "eval_precision": 0.9776366648260629, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.3335, + "eval_samples_per_second": 9.463, + "eval_steps_per_second": 1.588, + "step": 3020 + }, + { + "epoch": 10.13, + "eval_accuracy": 0.9971478602318856, + "eval_f1": 0.9813200498132004, + "eval_loss": 0.015171729028224945, + "eval_precision": 0.9792874896437448, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.3108, + "eval_samples_per_second": 9.468, + "eval_steps_per_second": 1.589, + "step": 3030 + }, + { + "epoch": 10.17, + "eval_accuracy": 0.9966570268764426, + "eval_f1": 0.9667917187717104, + "eval_loss": 0.015076521784067154, + "eval_precision": 0.9688109161793372, + "eval_recall": 0.9647809206877427, + "eval_runtime": 45.3556, + "eval_samples_per_second": 9.459, + "eval_steps_per_second": 1.587, + "step": 3040 + }, + { + "epoch": 10.2, + "eval_accuracy": 0.9968427476055292, + "eval_f1": 0.9746922970543493, + "eval_loss": 0.014860566705465317, + "eval_precision": 0.9721379310344828, + "eval_recall": 0.9772601220188575, + "eval_runtime": 45.4321, + "eval_samples_per_second": 9.443, + "eval_steps_per_second": 1.585, + "step": 3050 + }, + { + "epoch": 10.23, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.980110497237569, + "eval_loss": 0.014547467231750488, + "eval_precision": 0.9763346175013758, + "eval_recall": 0.9839156960621187, + "eval_runtime": 45.3097, + "eval_samples_per_second": 9.468, + "eval_steps_per_second": 1.589, + "step": 3060 + }, + { + "epoch": 10.27, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9792760431058304, + "eval_loss": 0.014211696572601795, + "eval_precision": 0.9757709251101322, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.7056, + "eval_samples_per_second": 9.386, + "eval_steps_per_second": 1.575, + "step": 3070 + }, + { + "epoch": 10.3, + "eval_accuracy": 0.9970284683346157, + "eval_f1": 0.9788469514724182, + "eval_loss": 0.01424412615597248, + "eval_precision": 0.9760132340777502, + "eval_recall": 0.9816971713810316, + "eval_runtime": 45.6738, + "eval_samples_per_second": 9.393, + "eval_steps_per_second": 1.576, + "step": 3080 + }, + { + "epoch": 10.33, + "eval_accuracy": 0.9969356079700724, + "eval_f1": 0.9745293466223699, + "eval_loss": 0.014736946672201157, + "eval_precision": 0.9729132117191819, + "eval_recall": 0.9761508596783139, + "eval_runtime": 45.4811, + "eval_samples_per_second": 9.432, + "eval_steps_per_second": 1.583, + "step": 3090 + }, + { + "epoch": 10.37, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9801050013815971, + "eval_loss": 0.014717744663357735, + "eval_precision": 0.9765969162995595, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.415, + "eval_samples_per_second": 9.446, + "eval_steps_per_second": 1.585, + "step": 3100 + }, + { + "epoch": 10.4, + "eval_accuracy": 0.9971080629327956, + "eval_f1": 0.9810642709053213, + "eval_loss": 0.015054759569466114, + "eval_precision": 0.9779553596031965, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.7255, + "eval_samples_per_second": 9.382, + "eval_steps_per_second": 1.575, + "step": 3110 + }, + { + "epoch": 10.43, + "eval_accuracy": 0.9971080629327956, + "eval_f1": 0.9807931463313527, + "eval_loss": 0.01575205847620964, + "eval_precision": 0.9774166896171853, + "eval_recall": 0.9841930116472546, + "eval_runtime": 45.3913, + "eval_samples_per_second": 9.451, + "eval_steps_per_second": 1.586, + "step": 3120 + }, + { + "epoch": 10.47, + "eval_accuracy": 0.9968162160728026, + "eval_f1": 0.97202216066482, + "eval_loss": 0.015902305021882057, + "eval_precision": 0.9709463198671832, + "eval_recall": 0.9731003882418192, + "eval_runtime": 45.3669, + "eval_samples_per_second": 9.456, + "eval_steps_per_second": 1.587, + "step": 3130 + }, + { + "epoch": 10.5, + "eval_accuracy": 0.9966835584091693, + "eval_f1": 0.9686633388796451, + "eval_loss": 0.016264360398054123, + "eval_precision": 0.9686633388796451, + "eval_recall": 0.9686633388796451, + "eval_runtime": 45.3009, + "eval_samples_per_second": 9.47, + "eval_steps_per_second": 1.589, + "step": 3140 + }, + { + "epoch": 10.54, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9782698961937716, + "eval_loss": 0.015213034115731716, + "eval_precision": 0.9765128488532744, + "eval_recall": 0.9800332778702163, + "eval_runtime": 45.7467, + "eval_samples_per_second": 9.378, + "eval_steps_per_second": 1.574, + "step": 3150 + }, + { + "epoch": 10.57, + "eval_accuracy": 0.9969886710355257, + "eval_f1": 0.978693967902601, + "eval_loss": 0.014814727008342743, + "eval_precision": 0.9765323025952513, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.3386, + "eval_samples_per_second": 9.462, + "eval_steps_per_second": 1.588, + "step": 3160 + }, + { + "epoch": 10.6, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9793828697938286, + "eval_loss": 0.015096531249582767, + "eval_precision": 0.9773543220104943, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.6502, + "eval_samples_per_second": 9.398, + "eval_steps_per_second": 1.577, + "step": 3170 + }, + { + "epoch": 10.64, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9764998617638928, + "eval_loss": 0.015553129836916924, + "eval_precision": 0.9735391400220507, + "eval_recall": 0.9794786466999446, + "eval_runtime": 45.4708, + "eval_samples_per_second": 9.435, + "eval_steps_per_second": 1.583, + "step": 3180 + }, + { + "epoch": 10.67, + "eval_accuracy": 0.9970152025682524, + "eval_f1": 0.9774643992810729, + "eval_loss": 0.0158841609954834, + "eval_precision": 0.9746346843121036, + "eval_recall": 0.9803105934553522, + "eval_runtime": 45.4753, + "eval_samples_per_second": 9.434, + "eval_steps_per_second": 1.583, + "step": 3190 + }, + { + "epoch": 10.7, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9800884955752212, + "eval_loss": 0.016482515260577202, + "eval_precision": 0.9773855488141202, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.5435, + "eval_samples_per_second": 9.42, + "eval_steps_per_second": 1.581, + "step": 3200 + }, + { + "epoch": 10.74, + "eval_accuracy": 0.9970815314000689, + "eval_f1": 0.9795410561238596, + "eval_loss": 0.016413580626249313, + "eval_precision": 0.9765711135611908, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.616, + "eval_samples_per_second": 9.405, + "eval_steps_per_second": 1.578, + "step": 3210 + }, + { + "epoch": 10.77, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9791176877333702, + "eval_loss": 0.01622335985302925, + "eval_precision": 0.976551724137931, + "eval_recall": 0.9816971713810316, + "eval_runtime": 45.7676, + "eval_samples_per_second": 9.373, + "eval_steps_per_second": 1.573, + "step": 3220 + }, + { + "epoch": 10.8, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9803541781959049, + "eval_loss": 0.015803754329681396, + "eval_precision": 0.9781888459414688, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.7275, + "eval_samples_per_second": 9.382, + "eval_steps_per_second": 1.575, + "step": 3230 + }, + { + "epoch": 10.84, + "eval_accuracy": 0.9968958106709824, + "eval_f1": 0.9793828697938286, + "eval_loss": 0.015858009457588196, + "eval_precision": 0.9773543220104943, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.806, + "eval_samples_per_second": 9.366, + "eval_steps_per_second": 1.572, + "step": 3240 + }, + { + "epoch": 10.87, + "eval_accuracy": 0.9966968241755326, + "eval_f1": 0.9738263398421271, + "eval_loss": 0.016643954440951347, + "eval_precision": 0.9726141078838174, + "eval_recall": 0.9750415973377704, + "eval_runtime": 45.5359, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.581, + "step": 3250 + }, + { + "epoch": 10.9, + "eval_accuracy": 0.9969356079700724, + "eval_f1": 0.977728593166413, + "eval_loss": 0.016697365790605545, + "eval_precision": 0.9754347226055755, + "eval_recall": 0.9800332778702163, + "eval_runtime": 45.7975, + "eval_samples_per_second": 9.367, + "eval_steps_per_second": 1.572, + "step": 3260 + }, + { + "epoch": 10.94, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.977297895902547, + "eval_loss": 0.016878845170140266, + "eval_precision": 0.9756771697070205, + "eval_recall": 0.9789240155296728, + "eval_runtime": 45.4916, + "eval_samples_per_second": 9.43, + "eval_steps_per_second": 1.583, + "step": 3270 + }, + { + "epoch": 10.97, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.9780052566053397, + "eval_loss": 0.016803227365016937, + "eval_precision": 0.9757107369583218, + "eval_recall": 0.9803105934553522, + "eval_runtime": 45.574, + "eval_samples_per_second": 9.413, + "eval_steps_per_second": 1.58, + "step": 3280 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.9969754052691624, + "eval_f1": 0.9811790755604761, + "eval_loss": 0.016553932800889015, + "eval_precision": 0.9792817679558011, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.6533, + "eval_samples_per_second": 9.397, + "eval_steps_per_second": 1.577, + "step": 3290 + }, + { + "epoch": 11.04, + "eval_accuracy": 0.9971080629327956, + "eval_f1": 0.9845132743362832, + "eval_loss": 0.015565542504191399, + "eval_precision": 0.9817981246552675, + "eval_recall": 0.9872434830837493, + "eval_runtime": 45.6303, + "eval_samples_per_second": 9.402, + "eval_steps_per_second": 1.578, + "step": 3300 + }, + { + "epoch": 11.07, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9846579129232895, + "eval_loss": 0.014836783520877361, + "eval_precision": 0.981537613667677, + "eval_recall": 0.9877981142540211, + "eval_runtime": 45.5788, + "eval_samples_per_second": 9.412, + "eval_steps_per_second": 1.58, + "step": 3310 + }, + { + "epoch": 11.1, + "eval_accuracy": 0.9970549998673424, + "eval_f1": 0.9835521769177609, + "eval_loss": 0.015223776921629906, + "eval_precision": 0.9804353816478368, + "eval_recall": 0.9866888519134775, + "eval_runtime": 45.8006, + "eval_samples_per_second": 9.367, + "eval_steps_per_second": 1.572, + "step": 3320 + }, + { + "epoch": 11.14, + "eval_accuracy": 0.9969754052691624, + "eval_f1": 0.9827133176600746, + "eval_loss": 0.015493294224143028, + "eval_precision": 0.9801379310344828, + "eval_recall": 0.9853022739877981, + "eval_runtime": 46.0713, + "eval_samples_per_second": 9.312, + "eval_steps_per_second": 1.563, + "step": 3330 + }, + { + "epoch": 11.17, + "eval_accuracy": 0.9970019368018891, + "eval_f1": 0.9827180976081846, + "eval_loss": 0.014980579726397991, + "eval_precision": 0.9798731734215606, + "eval_recall": 0.985579589572934, + "eval_runtime": 45.5835, + "eval_samples_per_second": 9.411, + "eval_steps_per_second": 1.58, + "step": 3340 + }, + { + "epoch": 11.2, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9840918522617236, + "eval_loss": 0.014826910570263863, + "eval_precision": 0.9817830527187413, + "eval_recall": 0.9864115363283417, + "eval_runtime": 45.5689, + "eval_samples_per_second": 9.414, + "eval_steps_per_second": 1.58, + "step": 3350 + }, + { + "epoch": 11.24, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.983402489626556, + "eval_loss": 0.015205192379653454, + "eval_precision": 0.9809602649006622, + "eval_recall": 0.9858569051580699, + "eval_runtime": 45.5733, + "eval_samples_per_second": 9.413, + "eval_steps_per_second": 1.58, + "step": 3360 + }, + { + "epoch": 11.27, + "eval_accuracy": 0.9970815314000689, + "eval_f1": 0.9835430784123911, + "eval_loss": 0.015277662314474583, + "eval_precision": 0.9809655172413793, + "eval_recall": 0.9861342207432058, + "eval_runtime": 45.8383, + "eval_samples_per_second": 9.359, + "eval_steps_per_second": 1.571, + "step": 3370 + }, + { + "epoch": 11.3, + "eval_accuracy": 0.9969090764373458, + "eval_f1": 0.9803921568627452, + "eval_loss": 0.015845883637666702, + "eval_precision": 0.9763476347634763, + "eval_recall": 0.9844703272323905, + "eval_runtime": 45.6768, + "eval_samples_per_second": 9.392, + "eval_steps_per_second": 1.576, + "step": 3380 + }, + { + "epoch": 11.34, + "eval_accuracy": 0.9968958106709824, + "eval_f1": 0.9808037563872393, + "eval_loss": 0.016353582963347435, + "eval_precision": 0.9768913342503439, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.6996, + "eval_samples_per_second": 9.387, + "eval_steps_per_second": 1.576, + "step": 3390 + }, + { + "epoch": 11.37, + "eval_accuracy": 0.9966835584091693, + "eval_f1": 0.9728832318760378, + "eval_loss": 0.01731358841061592, + "eval_precision": 0.9707344008834898, + "eval_recall": 0.9750415973377704, + "eval_runtime": 45.9308, + "eval_samples_per_second": 9.34, + "eval_steps_per_second": 1.568, + "step": 3400 + }, + { + "epoch": 11.4, + "eval_accuracy": 0.9969090764373458, + "eval_f1": 0.9787116394802323, + "eval_loss": 0.015837378799915314, + "eval_precision": 0.9757442116868799, + "eval_recall": 0.9816971713810316, + "eval_runtime": 45.8585, + "eval_samples_per_second": 9.355, + "eval_steps_per_second": 1.57, + "step": 3410 + }, + { + "epoch": 11.44, + "eval_accuracy": 0.9968029503064392, + "eval_f1": 0.9741534208707672, + "eval_loss": 0.0164649561047554, + "eval_precision": 0.9710664094791954, + "eval_recall": 0.9772601220188575, + "eval_runtime": 45.5679, + "eval_samples_per_second": 9.415, + "eval_steps_per_second": 1.58, + "step": 3420 + }, + { + "epoch": 11.47, + "eval_accuracy": 0.9964182430819029, + "eval_f1": 0.9646373595895161, + "eval_loss": 0.017043212428689003, + "eval_precision": 0.9647711511789182, + "eval_recall": 0.9645036051026068, + "eval_runtime": 45.6723, + "eval_samples_per_second": 9.393, + "eval_steps_per_second": 1.576, + "step": 3430 + }, + { + "epoch": 11.51, + "eval_accuracy": 0.9964182430819029, + "eval_f1": 0.9653643668606262, + "eval_loss": 0.017299339175224304, + "eval_precision": 0.964562569213732, + "eval_recall": 0.9661674986134221, + "eval_runtime": 46.0069, + "eval_samples_per_second": 9.325, + "eval_steps_per_second": 1.565, + "step": 3440 + }, + { + "epoch": 11.54, + "eval_accuracy": 0.9964713061473561, + "eval_f1": 0.968054211035818, + "eval_loss": 0.01724633388221264, + "eval_precision": 0.9655172413793104, + "eval_recall": 0.9706045479755963, + "eval_runtime": 45.6115, + "eval_samples_per_second": 9.406, + "eval_steps_per_second": 1.579, + "step": 3450 + }, + { + "epoch": 11.57, + "eval_accuracy": 0.9964845719137194, + "eval_f1": 0.9670724958494743, + "eval_loss": 0.017298314720392227, + "eval_precision": 0.9649364991717283, + "eval_recall": 0.9692179700499168, + "eval_runtime": 45.5306, + "eval_samples_per_second": 9.422, + "eval_steps_per_second": 1.581, + "step": 3460 + }, + { + "epoch": 11.61, + "eval_accuracy": 0.9967366214746226, + "eval_f1": 0.9718875502008032, + "eval_loss": 0.015642836689949036, + "eval_precision": 0.9706777316735823, + "eval_recall": 0.9731003882418192, + "eval_runtime": 46.9214, + "eval_samples_per_second": 9.143, + "eval_steps_per_second": 1.534, + "step": 3470 + }, + { + "epoch": 11.64, + "eval_accuracy": 0.9969356079700724, + "eval_f1": 0.9768986028496335, + "eval_loss": 0.015028764493763447, + "eval_precision": 0.9746066795473365, + "eval_recall": 0.9792013311148087, + "eval_runtime": 45.682, + "eval_samples_per_second": 9.391, + "eval_steps_per_second": 1.576, + "step": 3480 + }, + { + "epoch": 11.67, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9775747508305648, + "eval_loss": 0.015305536799132824, + "eval_precision": 0.9759535655058044, + "eval_recall": 0.9792013311148087, + "eval_runtime": 45.8278, + "eval_samples_per_second": 9.361, + "eval_steps_per_second": 1.571, + "step": 3490 + }, + { + "epoch": 11.71, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.0042, + "step": 3500 + }, + { + "epoch": 11.71, + "eval_accuracy": 0.9969754052691624, + "eval_f1": 0.9772853185595569, + "eval_loss": 0.015260215848684311, + "eval_precision": 0.9762036524626453, + "eval_recall": 0.978369384359401, + "eval_runtime": 45.5813, + "eval_samples_per_second": 9.412, + "eval_steps_per_second": 1.58, + "step": 3500 + }, + { + "epoch": 11.74, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.9785407725321887, + "eval_loss": 0.015263444744050503, + "eval_precision": 0.9770528061929776, + "eval_recall": 0.9800332778702163, + "eval_runtime": 46.1625, + "eval_samples_per_second": 9.293, + "eval_steps_per_second": 1.56, + "step": 3510 + }, + { + "epoch": 11.77, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9793771626297577, + "eval_loss": 0.01575734093785286, + "eval_precision": 0.9776181265542968, + "eval_recall": 0.9811425402107599, + "eval_runtime": 45.6916, + "eval_samples_per_second": 9.389, + "eval_steps_per_second": 1.576, + "step": 3520 + }, + { + "epoch": 11.81, + "eval_accuracy": 0.9970682656337057, + "eval_f1": 0.9803541781959049, + "eval_loss": 0.016223162412643433, + "eval_precision": 0.9781888459414688, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.6681, + "eval_samples_per_second": 9.394, + "eval_steps_per_second": 1.577, + "step": 3530 + }, + { + "epoch": 11.84, + "eval_accuracy": 0.9970152025682524, + "eval_f1": 0.9794056668970283, + "eval_loss": 0.016862712800502777, + "eval_precision": 0.9763020115734362, + "eval_recall": 0.9825291181364393, + "eval_runtime": 45.8595, + "eval_samples_per_second": 9.355, + "eval_steps_per_second": 1.57, + "step": 3540 + }, + { + "epoch": 11.87, + "eval_accuracy": 0.9969754052691624, + "eval_f1": 0.9781526548672567, + "eval_loss": 0.016722770407795906, + "eval_precision": 0.9754550468836183, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.6514, + "eval_samples_per_second": 9.397, + "eval_steps_per_second": 1.577, + "step": 3550 + }, + { + "epoch": 11.91, + "eval_accuracy": 0.997041734100979, + "eval_f1": 0.9792473713337022, + "eval_loss": 0.01629817485809326, + "eval_precision": 0.9770844837106571, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.6901, + "eval_samples_per_second": 9.389, + "eval_steps_per_second": 1.576, + "step": 3560 + }, + { + "epoch": 11.94, + "eval_accuracy": 0.9970152025682524, + "eval_f1": 0.9795240730492528, + "eval_loss": 0.01616574078798294, + "eval_precision": 0.97736057426836, + "eval_recall": 0.9816971713810316, + "eval_runtime": 46.0985, + "eval_samples_per_second": 9.306, + "eval_steps_per_second": 1.562, + "step": 3570 + }, + { + "epoch": 11.97, + "eval_accuracy": 0.9969488737364357, + "eval_f1": 0.9778761061946902, + "eval_loss": 0.01619311049580574, + "eval_precision": 0.9751792608935466, + "eval_recall": 0.9805879090404881, + "eval_runtime": 45.9524, + "eval_samples_per_second": 9.336, + "eval_steps_per_second": 1.567, + "step": 3580 + }, + { + "epoch": 12.01, + "eval_accuracy": 0.9969886710355257, + "eval_f1": 0.9785822854774078, + "eval_loss": 0.01636885292828083, + "eval_precision": 0.97521343982374, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.6302, + "eval_samples_per_second": 9.402, + "eval_steps_per_second": 1.578, + "step": 3590 + }, + { + "epoch": 12.04, + "eval_accuracy": 0.9969886710355257, + "eval_f1": 0.9784470848300635, + "eval_loss": 0.01634366624057293, + "eval_precision": 0.9749449339207048, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.6457, + "eval_samples_per_second": 9.398, + "eval_steps_per_second": 1.577, + "step": 3600 + }, + { + "epoch": 12.07, + "eval_accuracy": 0.9969621395027991, + "eval_f1": 0.9776119402985075, + "eval_loss": 0.016408545896410942, + "eval_precision": 0.9743801652892562, + "eval_recall": 0.980865224625624, + "eval_runtime": 46.0467, + "eval_samples_per_second": 9.317, + "eval_steps_per_second": 1.564, + "step": 3610 + }, + { + "epoch": 12.11, + "eval_accuracy": 0.9971345944655223, + "eval_f1": 0.9778822228366049, + "eval_loss": 0.01554455328732729, + "eval_precision": 0.9749173098125689, + "eval_recall": 0.980865224625624, + "eval_runtime": 45.8672, + "eval_samples_per_second": 9.353, + "eval_steps_per_second": 1.57, + "step": 3620 + }, + { + "epoch": 12.14, + "eval_accuracy": 0.9971743917646122, + "eval_f1": 0.9781284606866002, + "eval_loss": 0.015367398969829082, + "eval_precision": 0.9765063571033721, + "eval_recall": 0.9797559622850804, + "eval_runtime": 45.8306, + "eval_samples_per_second": 9.361, + "eval_steps_per_second": 1.571, + "step": 3630 + }, + { + "epoch": 12.17, + "eval_accuracy": 0.9970815314000689, + "eval_f1": 0.9784172661870504, + "eval_loss": 0.016077237203717232, + "eval_precision": 0.9762562120375483, + "eval_recall": 0.9805879090404881, + "eval_runtime": 45.7148, + "eval_samples_per_second": 9.384, + "eval_steps_per_second": 1.575, + "step": 3640 + }, + { + "epoch": 12.21, + "eval_accuracy": 0.9968825449046191, + "eval_f1": 0.9757986447241046, + "eval_loss": 0.016824740916490555, + "eval_precision": 0.9732413793103448, + "eval_recall": 0.978369384359401, + "eval_runtime": 46.1214, + "eval_samples_per_second": 9.302, + "eval_steps_per_second": 1.561, + "step": 3650 + }, + { + "epoch": 12.24, + "eval_accuracy": 0.9969223422037091, + "eval_f1": 0.978146611341632, + "eval_loss": 0.016822459176182747, + "eval_precision": 0.9757174392935982, + "eval_recall": 0.9805879090404881, + "eval_runtime": 45.7927, + "eval_samples_per_second": 9.368, + "eval_steps_per_second": 1.572, + "step": 3660 + }, + { + "epoch": 12.27, + "eval_accuracy": 0.9968825449046191, + "eval_f1": 0.9792588495575221, + "eval_loss": 0.01815040595829487, + "eval_precision": 0.9765581908439052, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.8912, + "eval_samples_per_second": 9.348, + "eval_steps_per_second": 1.569, + "step": 3670 + }, + { + "epoch": 12.31, + "eval_accuracy": 0.9967764187737126, + "eval_f1": 0.9788586430841509, + "eval_loss": 0.01881832629442215, + "eval_precision": 0.9754888460479206, + "eval_recall": 0.9822518025513034, + "eval_runtime": 46.0893, + "eval_samples_per_second": 9.308, + "eval_steps_per_second": 1.562, + "step": 3680 + }, + { + "epoch": 12.34, + "eval_accuracy": 0.9968294818391659, + "eval_f1": 0.976892209768922, + "eval_loss": 0.017492154613137245, + "eval_precision": 0.9748688207677437, + "eval_recall": 0.9789240155296728, + "eval_runtime": 45.8024, + "eval_samples_per_second": 9.366, + "eval_steps_per_second": 1.572, + "step": 3690 + }, + { + "epoch": 12.37, + "eval_accuracy": 0.9966172295773527, + "eval_f1": 0.9710806697108068, + "eval_loss": 0.018154501914978027, + "eval_precision": 0.9690693178679922, + "eval_recall": 0.9731003882418192, + "eval_runtime": 46.0834, + "eval_samples_per_second": 9.309, + "eval_steps_per_second": 1.562, + "step": 3700 + }, + { + "epoch": 12.41, + "eval_accuracy": 0.9962457881191796, + "eval_f1": 0.9614745011086475, + "eval_loss": 0.01924285851418972, + "eval_precision": 0.9609418282548476, + "eval_recall": 0.9620077648363838, + "eval_runtime": 45.696, + "eval_samples_per_second": 9.388, + "eval_steps_per_second": 1.576, + "step": 3710 + }, + { + "epoch": 12.44, + "eval_accuracy": 0.9964580403809928, + "eval_f1": 0.9651259341267644, + "eval_loss": 0.017841314896941185, + "eval_precision": 0.9632596685082873, + "eval_recall": 0.9669994453688298, + "eval_runtime": 45.9939, + "eval_samples_per_second": 9.327, + "eval_steps_per_second": 1.565, + "step": 3720 + }, + { + "epoch": 12.47, + "eval_accuracy": 0.9966304953437161, + "eval_f1": 0.970962389380531, + "eval_loss": 0.016754455864429474, + "eval_precision": 0.968284611141754, + "eval_recall": 0.973655019412091, + "eval_runtime": 45.8171, + "eval_samples_per_second": 9.363, + "eval_steps_per_second": 1.571, + "step": 3730 + }, + { + "epoch": 12.51, + "eval_accuracy": 0.9968294818391659, + "eval_f1": 0.9774269491760144, + "eval_loss": 0.015923812985420227, + "eval_precision": 0.9762102351313969, + "eval_recall": 0.9786466999445369, + "eval_runtime": 45.7801, + "eval_samples_per_second": 9.371, + "eval_steps_per_second": 1.573, + "step": 3740 + }, + { + "epoch": 12.54, + "eval_accuracy": 0.9968825449046191, + "eval_f1": 0.9772916089725838, + "eval_loss": 0.015936698764562607, + "eval_precision": 0.9759402654867256, + "eval_recall": 0.9786466999445369, + "eval_runtime": 45.9975, + "eval_samples_per_second": 9.327, + "eval_steps_per_second": 1.565, + "step": 3750 + }, + { + "epoch": 12.58, + "eval_accuracy": 0.9968029503064392, + "eval_f1": 0.9736623232603271, + "eval_loss": 0.016157541424036026, + "eval_precision": 0.9733924611973392, + "eval_recall": 0.9739323349972269, + "eval_runtime": 45.7117, + "eval_samples_per_second": 9.385, + "eval_steps_per_second": 1.575, + "step": 3760 + }, + { + "epoch": 12.61, + "eval_accuracy": 0.9967366214746226, + "eval_f1": 0.9694117647058823, + "eval_loss": 0.01580197736620903, + "eval_precision": 0.9676706272450953, + "eval_recall": 0.971159179145868, + "eval_runtime": 45.5828, + "eval_samples_per_second": 9.411, + "eval_steps_per_second": 1.58, + "step": 3770 + }, + { + "epoch": 12.64, + "eval_accuracy": 0.9969090764373458, + "eval_f1": 0.971460238293156, + "eval_loss": 0.015100532211363316, + "eval_precision": 0.9706533776301218, + "eval_recall": 0.9722684414864116, + "eval_runtime": 45.7142, + "eval_samples_per_second": 9.384, + "eval_steps_per_second": 1.575, + "step": 3780 + }, + { + "epoch": 12.68, + "eval_accuracy": 0.9969488737364357, + "eval_f1": 0.9725609756097561, + "eval_loss": 0.015240387991070747, + "eval_precision": 0.97202216066482, + "eval_recall": 0.9731003882418192, + "eval_runtime": 46.103, + "eval_samples_per_second": 9.305, + "eval_steps_per_second": 1.562, + "step": 3790 + }, + { + "epoch": 12.71, + "eval_accuracy": 0.9970284683346157, + "eval_f1": 0.9779991697799916, + "eval_loss": 0.015490233898162842, + "eval_precision": 0.975973487986744, + "eval_recall": 0.9800332778702163, + "eval_runtime": 45.7054, + "eval_samples_per_second": 9.386, + "eval_steps_per_second": 1.575, + "step": 3800 + }, + { + "epoch": 12.74, + "eval_accuracy": 0.9971213286991589, + "eval_f1": 0.980060924951537, + "eval_loss": 0.014931376092135906, + "eval_precision": 0.9787057522123894, + "eval_recall": 0.9814198557958957, + "eval_runtime": 45.8361, + "eval_samples_per_second": 9.359, + "eval_steps_per_second": 1.571, + "step": 3810 + }, + { + "epoch": 12.78, + "eval_accuracy": 0.9970947971664323, + "eval_f1": 0.9769764216366158, + "eval_loss": 0.013774119317531586, + "eval_precision": 0.9772475027746947, + "eval_recall": 0.9767054908485857, + "eval_runtime": 46.4293, + "eval_samples_per_second": 9.24, + "eval_steps_per_second": 1.551, + "step": 3820 + }, + { + "epoch": 12.81, + "eval_accuracy": 0.9971611259982489, + "eval_f1": 0.9775436650956474, + "eval_loss": 0.01355548482388258, + "eval_precision": 0.9772727272727273, + "eval_recall": 0.9778147531891292, + "eval_runtime": 45.7415, + "eval_samples_per_second": 9.379, + "eval_steps_per_second": 1.574, + "step": 3830 + }, + { + "epoch": 12.84, + "eval_accuracy": 0.9973335809609721, + "eval_f1": 0.9824124082537044, + "eval_loss": 0.013703616335988045, + "eval_precision": 0.9811894882434301, + "eval_recall": 0.9836383804769828, + "eval_runtime": 45.73, + "eval_samples_per_second": 9.381, + "eval_steps_per_second": 1.574, + "step": 3840 + }, + { + "epoch": 12.88, + "eval_accuracy": 0.9973601124936987, + "eval_f1": 0.9812102790826194, + "eval_loss": 0.014531377702951431, + "eval_precision": 0.9776982378854625, + "eval_recall": 0.9847476428175264, + "eval_runtime": 45.7852, + "eval_samples_per_second": 9.37, + "eval_steps_per_second": 1.573, + "step": 3850 + }, + { + "epoch": 12.91, + "eval_accuracy": 0.9968427476055292, + "eval_f1": 0.977526540741762, + "eval_loss": 0.016427284106612206, + "eval_precision": 0.9720318069646284, + "eval_recall": 0.983083749306711, + "eval_runtime": 45.8438, + "eval_samples_per_second": 9.358, + "eval_steps_per_second": 1.571, + "step": 3860 + }, + { + "epoch": 12.94, + "eval_accuracy": 0.9960202700910031, + "eval_f1": 0.9653931339977851, + "eval_loss": 0.021676059812307358, + "eval_precision": 0.9637921503593145, + "eval_recall": 0.9669994453688298, + "eval_runtime": 45.7888, + "eval_samples_per_second": 9.369, + "eval_steps_per_second": 1.572, + "step": 3870 + }, + { + "epoch": 12.98, + "eval_accuracy": 0.9967498872409859, + "eval_f1": 0.9777900400055181, + "eval_loss": 0.018003830686211586, + "eval_precision": 0.9728245951139171, + "eval_recall": 0.9828064337215752, + "eval_runtime": 45.7325, + "eval_samples_per_second": 9.381, + "eval_steps_per_second": 1.574, + "step": 3880 + }, + { + "epoch": 13.01, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.977667493796526, + "eval_loss": 0.01754719577729702, + "eval_precision": 0.9720394736842105, + "eval_recall": 0.9833610648918469, + "eval_runtime": 46.1857, + "eval_samples_per_second": 9.289, + "eval_steps_per_second": 1.559, + "step": 3890 + }, + { + "epoch": 13.04, + "eval_accuracy": 0.9967631530073492, + "eval_f1": 0.9739274382673472, + "eval_loss": 0.01792616955935955, + "eval_precision": 0.96898160856437, + "eval_recall": 0.9789240155296728, + "eval_runtime": 45.7326, + "eval_samples_per_second": 9.381, + "eval_steps_per_second": 1.574, + "step": 3900 + }, + { + "epoch": 13.08, + "eval_accuracy": 0.9967631530073492, + "eval_f1": 0.9729431253451133, + "eval_loss": 0.017918290570378304, + "eval_precision": 0.9686641011544805, + "eval_recall": 0.9772601220188575, + "eval_runtime": 46.0978, + "eval_samples_per_second": 9.306, + "eval_steps_per_second": 1.562, + "step": 3910 + }, + { + "epoch": 13.11, + "eval_accuracy": 0.9968692791382558, + "eval_f1": 0.9765323025952513, + "eval_loss": 0.017354754731059074, + "eval_precision": 0.9722374931280924, + "eval_recall": 0.980865224625624, + "eval_runtime": 46.21, + "eval_samples_per_second": 9.284, + "eval_steps_per_second": 1.558, + "step": 3920 + }, + { + "epoch": 13.14, + "eval_accuracy": 0.9969356079700724, + "eval_f1": 0.9775017253278124, + "eval_loss": 0.015932898968458176, + "eval_precision": 0.9730695245946689, + "eval_recall": 0.9819744869661675, + "eval_runtime": 45.7375, + "eval_samples_per_second": 9.38, + "eval_steps_per_second": 1.574, + "step": 3930 + }, + { + "epoch": 13.18, + "eval_accuracy": 0.9969090764373458, + "eval_f1": 0.9779005524861878, + "eval_loss": 0.015960365533828735, + "eval_precision": 0.9741331865712713, + "eval_recall": 0.9816971713810316, + "eval_runtime": 45.8882, + "eval_samples_per_second": 9.349, + "eval_steps_per_second": 1.569, + "step": 3940 + }, + { + "epoch": 13.21, + "eval_accuracy": 0.9967498872409859, + "eval_f1": 0.9737569060773481, + "eval_loss": 0.017051612958312035, + "eval_precision": 0.9700055035773253, + "eval_recall": 0.9775374376039934, + "eval_runtime": 45.8012, + "eval_samples_per_second": 9.367, + "eval_steps_per_second": 1.572, + "step": 3950 + }, + { + "epoch": 13.24, + "eval_accuracy": 0.9966702926428059, + "eval_f1": 0.9723527785457561, + "eval_loss": 0.01766437292098999, + "eval_precision": 0.9694046306504961, + "eval_recall": 0.9753189129229063, + "eval_runtime": 45.8759, + "eval_samples_per_second": 9.351, + "eval_steps_per_second": 1.569, + "step": 3960 + }, + { + "epoch": 13.28, + "eval_accuracy": 0.9966570268764426, + "eval_f1": 0.9727411097274111, + "eval_loss": 0.01822058856487274, + "eval_precision": 0.9707263186964927, + "eval_recall": 0.9747642817526345, + "eval_runtime": 45.8529, + "eval_samples_per_second": 9.356, + "eval_steps_per_second": 1.57, + "step": 3970 + }, + { + "epoch": 13.31, + "eval_accuracy": 0.9966304953437161, + "eval_f1": 0.9724872113922302, + "eval_loss": 0.01849460043013096, + "eval_precision": 0.9696719051557762, + "eval_recall": 0.9753189129229063, + "eval_runtime": 45.7611, + "eval_samples_per_second": 9.375, + "eval_steps_per_second": 1.573, + "step": 3980 + }, + { + "epoch": 13.34, + "eval_accuracy": 0.9968427476055292, + "eval_f1": 0.9780417069465543, + "eval_loss": 0.017539281398057938, + "eval_precision": 0.9741403026134801, + "eval_recall": 0.9819744869661675, + "eval_runtime": 46.0339, + "eval_samples_per_second": 9.319, + "eval_steps_per_second": 1.564, + "step": 3990 + }, + { + "epoch": 13.38, + "learning_rate": 6e-06, + "loss": 0.0037, + "step": 4000 + }, + { + "epoch": 13.38, + "eval_accuracy": 0.9968958106709824, + "eval_f1": 0.9792874896437448, + "eval_loss": 0.017371075227856636, + "eval_precision": 0.9752475247524752, + "eval_recall": 0.9833610648918469, + "eval_runtime": 45.7746, + "eval_samples_per_second": 9.372, + "eval_steps_per_second": 1.573, + "step": 4000 + } + ], + "max_steps": 10000, + "num_train_epochs": 34, + "total_flos": 6370581676032000.0, + "trial_name": null, + "trial_params": null +}