diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,9 +1,9 @@ { - "best_metric": 0.5081, - "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-19347", - "epoch": 4.999870784339062, + "best_metric": 11.3364, + "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-38692", + "epoch": 9.999870784339063, "eval_steps": 500, - "global_step": 19347, + "global_step": 38692, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -5865,6 +5865,5858 @@ "eval_samples_per_second": 1.019, "eval_steps_per_second": 0.51, "step": 19347 + }, + { + "epoch": 5.0038764698281435, + "learning_rate": 1.6894852845844678e-07, + "loss": 2.6522, + "step": 19360 + }, + { + "epoch": 5.009045096265667, + "learning_rate": 1.6891611564890445e-07, + "loss": 2.6008, + "step": 19380 + }, + { + "epoch": 5.014213722703191, + "learning_rate": 1.688837028393621e-07, + "loss": 2.6294, + "step": 19400 + }, + { + "epoch": 5.019382349140716, + "learning_rate": 1.6885129002981977e-07, + "loss": 2.6519, + "step": 19420 + }, + { + "epoch": 5.02455097557824, + "learning_rate": 1.6881887722027743e-07, + "loss": 2.6537, + "step": 19440 + }, + { + "epoch": 5.029719602015764, + "learning_rate": 1.6878646441073512e-07, + "loss": 2.6256, + "step": 19460 + }, + { + "epoch": 5.034888228453289, + "learning_rate": 1.6875405160119278e-07, + "loss": 2.6721, + "step": 19480 + }, + { + "epoch": 5.040056854890813, + "learning_rate": 1.6872163879165044e-07, + "loss": 2.6894, + "step": 19500 + }, + { + "epoch": 5.045225481328337, + "learning_rate": 1.6868922598210813e-07, + "loss": 2.6702, + "step": 19520 + }, + { + "epoch": 5.050394107765861, + "learning_rate": 1.686568131725658e-07, + "loss": 2.7041, + "step": 19540 + }, + { + "epoch": 5.055562734203385, + "learning_rate": 1.6862440036302346e-07, + "loss": 2.7243, + "step": 19560 + }, + { + "epoch": 5.06073136064091, + "learning_rate": 1.6859198755348114e-07, + "loss": 2.7082, + "step": 19580 + }, + { + "epoch": 5.065899987078434, + "learning_rate": 1.685595747439388e-07, + "loss": 2.6755, + "step": 19600 + }, + { + "epoch": 5.071068613515958, + "learning_rate": 1.6852716193439647e-07, + "loss": 2.6075, + "step": 19620 + }, + { + "epoch": 5.076237239953483, + "learning_rate": 1.6849474912485413e-07, + "loss": 2.6402, + "step": 19640 + }, + { + "epoch": 5.081405866391006, + "learning_rate": 1.684623363153118e-07, + "loss": 2.6928, + "step": 19660 + }, + { + "epoch": 5.086574492828531, + "learning_rate": 1.6842992350576948e-07, + "loss": 2.6689, + "step": 19680 + }, + { + "epoch": 5.091743119266055, + "learning_rate": 1.6839751069622714e-07, + "loss": 2.6848, + "step": 19700 + }, + { + "epoch": 5.096911745703579, + "learning_rate": 1.683650978866848e-07, + "loss": 2.6315, + "step": 19720 + }, + { + "epoch": 5.102080372141104, + "learning_rate": 1.683326850771425e-07, + "loss": 2.6936, + "step": 19740 + }, + { + "epoch": 5.107248998578628, + "learning_rate": 1.6830027226760015e-07, + "loss": 2.6354, + "step": 19760 + }, + { + "epoch": 5.112417625016152, + "learning_rate": 1.6826785945805782e-07, + "loss": 2.6376, + "step": 19780 + }, + { + "epoch": 5.117586251453676, + "learning_rate": 1.6823544664851548e-07, + "loss": 2.7595, + "step": 19800 + }, + { + "epoch": 5.1227548778912, + "learning_rate": 1.6820303383897317e-07, + "loss": 2.5688, + "step": 19820 + }, + { + "epoch": 5.127923504328725, + "learning_rate": 1.6817062102943083e-07, + "loss": 2.6504, + "step": 19840 + }, + { + "epoch": 5.133092130766249, + "learning_rate": 1.681382082198885e-07, + "loss": 2.701, + "step": 19860 + }, + { + "epoch": 5.138260757203773, + "learning_rate": 1.6810579541034615e-07, + "loss": 2.6691, + "step": 19880 + }, + { + "epoch": 5.143429383641298, + "learning_rate": 1.6807338260080384e-07, + "loss": 2.6396, + "step": 19900 + }, + { + "epoch": 5.148598010078821, + "learning_rate": 1.680409697912615e-07, + "loss": 2.6752, + "step": 19920 + }, + { + "epoch": 5.1537666365163455, + "learning_rate": 1.6800855698171916e-07, + "loss": 2.567, + "step": 19940 + }, + { + "epoch": 5.15893526295387, + "learning_rate": 1.6797614417217685e-07, + "loss": 2.6691, + "step": 19960 + }, + { + "epoch": 5.164103889391394, + "learning_rate": 1.6794373136263451e-07, + "loss": 2.6172, + "step": 19980 + }, + { + "epoch": 5.169272515828919, + "learning_rate": 1.6791131855309218e-07, + "loss": 2.6856, + "step": 20000 + }, + { + "epoch": 5.174441142266443, + "learning_rate": 1.6787890574354984e-07, + "loss": 2.6874, + "step": 20020 + }, + { + "epoch": 5.179609768703967, + "learning_rate": 1.678464929340075e-07, + "loss": 2.6737, + "step": 20040 + }, + { + "epoch": 5.184778395141491, + "learning_rate": 1.678140801244652e-07, + "loss": 2.6503, + "step": 20060 + }, + { + "epoch": 5.189947021579015, + "learning_rate": 1.6778166731492285e-07, + "loss": 2.6155, + "step": 20080 + }, + { + "epoch": 5.1951156480165395, + "learning_rate": 1.677492545053805e-07, + "loss": 2.7035, + "step": 20100 + }, + { + "epoch": 5.200284274454064, + "learning_rate": 1.677168416958382e-07, + "loss": 2.6192, + "step": 20120 + }, + { + "epoch": 5.205452900891588, + "learning_rate": 1.6768442888629586e-07, + "loss": 2.5974, + "step": 20140 + }, + { + "epoch": 5.2106215273291125, + "learning_rate": 1.6765201607675352e-07, + "loss": 2.694, + "step": 20160 + }, + { + "epoch": 5.215790153766637, + "learning_rate": 1.676196032672112e-07, + "loss": 2.6391, + "step": 20180 + }, + { + "epoch": 5.22095878020416, + "learning_rate": 1.6758719045766887e-07, + "loss": 2.6428, + "step": 20200 + }, + { + "epoch": 5.226127406641685, + "learning_rate": 1.6755477764812654e-07, + "loss": 2.6796, + "step": 20220 + }, + { + "epoch": 5.231296033079209, + "learning_rate": 1.675223648385842e-07, + "loss": 2.6819, + "step": 20240 + }, + { + "epoch": 5.2364646595167335, + "learning_rate": 1.6748995202904186e-07, + "loss": 2.6714, + "step": 20260 + }, + { + "epoch": 5.241633285954258, + "learning_rate": 1.6745753921949955e-07, + "loss": 2.6392, + "step": 20280 + }, + { + "epoch": 5.246801912391782, + "learning_rate": 1.674251264099572e-07, + "loss": 2.7057, + "step": 20300 + }, + { + "epoch": 5.2519705388293065, + "learning_rate": 1.6739271360041487e-07, + "loss": 2.6676, + "step": 20320 + }, + { + "epoch": 5.25713916526683, + "learning_rate": 1.6736030079087256e-07, + "loss": 2.7037, + "step": 20340 + }, + { + "epoch": 5.262307791704354, + "learning_rate": 1.6732788798133022e-07, + "loss": 2.7595, + "step": 20360 + }, + { + "epoch": 5.267476418141879, + "learning_rate": 1.6729547517178788e-07, + "loss": 2.6665, + "step": 20380 + }, + { + "epoch": 5.272645044579403, + "learning_rate": 1.6726306236224554e-07, + "loss": 2.6281, + "step": 20400 + }, + { + "epoch": 5.277813671016927, + "learning_rate": 1.6723064955270323e-07, + "loss": 2.6047, + "step": 20420 + }, + { + "epoch": 5.282982297454452, + "learning_rate": 1.671982367431609e-07, + "loss": 2.6466, + "step": 20440 + }, + { + "epoch": 5.288150923891976, + "learning_rate": 1.6716582393361856e-07, + "loss": 2.6026, + "step": 20460 + }, + { + "epoch": 5.2933195503295, + "learning_rate": 1.6713341112407622e-07, + "loss": 2.6714, + "step": 20480 + }, + { + "epoch": 5.298488176767024, + "learning_rate": 1.671009983145339e-07, + "loss": 2.6351, + "step": 20500 + }, + { + "epoch": 5.303656803204548, + "learning_rate": 1.6706858550499157e-07, + "loss": 2.6293, + "step": 20520 + }, + { + "epoch": 5.308825429642073, + "learning_rate": 1.6703617269544923e-07, + "loss": 2.6368, + "step": 20540 + }, + { + "epoch": 5.313994056079597, + "learning_rate": 1.6700375988590692e-07, + "loss": 2.6963, + "step": 20560 + }, + { + "epoch": 5.319162682517121, + "learning_rate": 1.6697134707636458e-07, + "loss": 2.6401, + "step": 20580 + }, + { + "epoch": 5.324331308954645, + "learning_rate": 1.6693893426682224e-07, + "loss": 2.669, + "step": 20600 + }, + { + "epoch": 5.329499935392169, + "learning_rate": 1.669065214572799e-07, + "loss": 2.6384, + "step": 20620 + }, + { + "epoch": 5.334668561829694, + "learning_rate": 1.6687410864773757e-07, + "loss": 2.7073, + "step": 20640 + }, + { + "epoch": 5.339837188267218, + "learning_rate": 1.6684169583819526e-07, + "loss": 2.6507, + "step": 20660 + }, + { + "epoch": 5.345005814704742, + "learning_rate": 1.6680928302865292e-07, + "loss": 2.6378, + "step": 20680 + }, + { + "epoch": 5.350174441142267, + "learning_rate": 1.6677687021911058e-07, + "loss": 2.6756, + "step": 20700 + }, + { + "epoch": 5.355343067579791, + "learning_rate": 1.6674445740956827e-07, + "loss": 2.6667, + "step": 20720 + }, + { + "epoch": 5.3605116940173145, + "learning_rate": 1.6671204460002593e-07, + "loss": 2.6714, + "step": 20740 + }, + { + "epoch": 5.365680320454839, + "learning_rate": 1.666796317904836e-07, + "loss": 2.6117, + "step": 20760 + }, + { + "epoch": 5.370848946892363, + "learning_rate": 1.6664721898094128e-07, + "loss": 2.6513, + "step": 20780 + }, + { + "epoch": 5.376017573329888, + "learning_rate": 1.6661480617139894e-07, + "loss": 2.6997, + "step": 20800 + }, + { + "epoch": 5.381186199767412, + "learning_rate": 1.665823933618566e-07, + "loss": 2.6395, + "step": 20820 + }, + { + "epoch": 5.386354826204936, + "learning_rate": 1.6654998055231426e-07, + "loss": 2.6615, + "step": 20840 + }, + { + "epoch": 5.39152345264246, + "learning_rate": 1.6651756774277193e-07, + "loss": 2.6915, + "step": 20860 + }, + { + "epoch": 5.396692079079984, + "learning_rate": 1.6648515493322961e-07, + "loss": 2.6248, + "step": 20880 + }, + { + "epoch": 5.4018607055175085, + "learning_rate": 1.6645274212368728e-07, + "loss": 2.7061, + "step": 20900 + }, + { + "epoch": 5.407029331955033, + "learning_rate": 1.6642032931414494e-07, + "loss": 2.642, + "step": 20920 + }, + { + "epoch": 5.412197958392557, + "learning_rate": 1.6638791650460263e-07, + "loss": 2.6377, + "step": 20940 + }, + { + "epoch": 5.417366584830082, + "learning_rate": 1.663555036950603e-07, + "loss": 2.6452, + "step": 20960 + }, + { + "epoch": 5.422535211267606, + "learning_rate": 1.6632309088551795e-07, + "loss": 2.7206, + "step": 20980 + }, + { + "epoch": 5.427703837705129, + "learning_rate": 1.662906780759756e-07, + "loss": 2.6524, + "step": 21000 + }, + { + "epoch": 5.432872464142654, + "learning_rate": 1.662582652664333e-07, + "loss": 2.6398, + "step": 21020 + }, + { + "epoch": 5.438041090580178, + "learning_rate": 1.6622585245689096e-07, + "loss": 2.6486, + "step": 21040 + }, + { + "epoch": 5.4432097170177025, + "learning_rate": 1.6619343964734862e-07, + "loss": 2.6466, + "step": 21060 + }, + { + "epoch": 5.448378343455227, + "learning_rate": 1.6616102683780629e-07, + "loss": 2.6818, + "step": 21080 + }, + { + "epoch": 5.453546969892751, + "learning_rate": 1.6612861402826397e-07, + "loss": 2.6826, + "step": 21100 + }, + { + "epoch": 5.458715596330276, + "learning_rate": 1.6609620121872164e-07, + "loss": 2.665, + "step": 21120 + }, + { + "epoch": 5.463884222767799, + "learning_rate": 1.660637884091793e-07, + "loss": 2.6018, + "step": 21140 + }, + { + "epoch": 5.469052849205323, + "learning_rate": 1.66031375599637e-07, + "loss": 2.6867, + "step": 21160 + }, + { + "epoch": 5.474221475642848, + "learning_rate": 1.6599896279009465e-07, + "loss": 2.6972, + "step": 21180 + }, + { + "epoch": 5.479390102080372, + "learning_rate": 1.659665499805523e-07, + "loss": 2.6775, + "step": 21200 + }, + { + "epoch": 5.4845587285178965, + "learning_rate": 1.6593413717100997e-07, + "loss": 2.7239, + "step": 21220 + }, + { + "epoch": 5.489727354955421, + "learning_rate": 1.6590172436146763e-07, + "loss": 2.6349, + "step": 21240 + }, + { + "epoch": 5.494895981392945, + "learning_rate": 1.6586931155192532e-07, + "loss": 2.6607, + "step": 21260 + }, + { + "epoch": 5.500064607830469, + "learning_rate": 1.6583689874238298e-07, + "loss": 2.6806, + "step": 21280 + }, + { + "epoch": 5.505233234267993, + "learning_rate": 1.6580448593284065e-07, + "loss": 2.7212, + "step": 21300 + }, + { + "epoch": 5.510401860705517, + "learning_rate": 1.6577207312329833e-07, + "loss": 2.6997, + "step": 21320 + }, + { + "epoch": 5.515570487143042, + "learning_rate": 1.65739660313756e-07, + "loss": 2.6277, + "step": 21340 + }, + { + "epoch": 5.520739113580566, + "learning_rate": 1.6570724750421366e-07, + "loss": 2.6953, + "step": 21360 + }, + { + "epoch": 5.5259077400180905, + "learning_rate": 1.6567483469467135e-07, + "loss": 2.7586, + "step": 21380 + }, + { + "epoch": 5.531076366455615, + "learning_rate": 1.65642421885129e-07, + "loss": 2.5887, + "step": 21400 + }, + { + "epoch": 5.536244992893138, + "learning_rate": 1.6561000907558667e-07, + "loss": 2.6364, + "step": 21420 + }, + { + "epoch": 5.541413619330663, + "learning_rate": 1.6557759626604433e-07, + "loss": 2.5951, + "step": 21440 + }, + { + "epoch": 5.546582245768187, + "learning_rate": 1.65545183456502e-07, + "loss": 2.6595, + "step": 21460 + }, + { + "epoch": 5.551750872205711, + "learning_rate": 1.6551277064695968e-07, + "loss": 2.602, + "step": 21480 + }, + { + "epoch": 5.556919498643236, + "learning_rate": 1.6548035783741734e-07, + "loss": 2.5996, + "step": 21500 + }, + { + "epoch": 5.56208812508076, + "learning_rate": 1.65447945027875e-07, + "loss": 2.6529, + "step": 21520 + }, + { + "epoch": 5.5672567515182845, + "learning_rate": 1.654155322183327e-07, + "loss": 2.6917, + "step": 21540 + }, + { + "epoch": 5.572425377955808, + "learning_rate": 1.6538311940879036e-07, + "loss": 2.6509, + "step": 21560 + }, + { + "epoch": 5.577594004393332, + "learning_rate": 1.6535070659924802e-07, + "loss": 2.6524, + "step": 21580 + }, + { + "epoch": 5.582762630830857, + "learning_rate": 1.6531829378970568e-07, + "loss": 2.6894, + "step": 21600 + }, + { + "epoch": 5.587931257268381, + "learning_rate": 1.6528588098016334e-07, + "loss": 2.6451, + "step": 21620 + }, + { + "epoch": 5.593099883705905, + "learning_rate": 1.6525346817062103e-07, + "loss": 2.6636, + "step": 21640 + }, + { + "epoch": 5.59826851014343, + "learning_rate": 1.652210553610787e-07, + "loss": 2.6696, + "step": 21660 + }, + { + "epoch": 5.603437136580954, + "learning_rate": 1.6518864255153635e-07, + "loss": 2.6577, + "step": 21680 + }, + { + "epoch": 5.608605763018478, + "learning_rate": 1.6515622974199404e-07, + "loss": 2.5968, + "step": 21700 + }, + { + "epoch": 5.613774389456002, + "learning_rate": 1.651238169324517e-07, + "loss": 2.6492, + "step": 21720 + }, + { + "epoch": 5.618943015893526, + "learning_rate": 1.6509140412290937e-07, + "loss": 2.6357, + "step": 21740 + }, + { + "epoch": 5.624111642331051, + "learning_rate": 1.6505899131336705e-07, + "loss": 2.6401, + "step": 21760 + }, + { + "epoch": 5.629280268768575, + "learning_rate": 1.650265785038247e-07, + "loss": 2.5984, + "step": 21780 + }, + { + "epoch": 5.6344488952060985, + "learning_rate": 1.6499416569428238e-07, + "loss": 2.6678, + "step": 21800 + }, + { + "epoch": 5.639617521643623, + "learning_rate": 1.6496175288474004e-07, + "loss": 2.6161, + "step": 21820 + }, + { + "epoch": 5.644786148081147, + "learning_rate": 1.649293400751977e-07, + "loss": 2.6262, + "step": 21840 + }, + { + "epoch": 5.649954774518672, + "learning_rate": 1.648969272656554e-07, + "loss": 2.6514, + "step": 21860 + }, + { + "epoch": 5.655123400956196, + "learning_rate": 1.6486451445611305e-07, + "loss": 2.6629, + "step": 21880 + }, + { + "epoch": 5.66029202739372, + "learning_rate": 1.6483210164657071e-07, + "loss": 2.6764, + "step": 21900 + }, + { + "epoch": 5.665460653831245, + "learning_rate": 1.647996888370284e-07, + "loss": 2.6414, + "step": 21920 + }, + { + "epoch": 5.670629280268768, + "learning_rate": 1.6476727602748604e-07, + "loss": 2.5379, + "step": 21940 + }, + { + "epoch": 5.6757979067062925, + "learning_rate": 1.6473486321794373e-07, + "loss": 2.6744, + "step": 21960 + }, + { + "epoch": 5.680966533143817, + "learning_rate": 1.647024504084014e-07, + "loss": 2.7254, + "step": 21980 + }, + { + "epoch": 5.686135159581341, + "learning_rate": 1.6467003759885905e-07, + "loss": 2.6408, + "step": 22000 + }, + { + "epoch": 5.6913037860188656, + "learning_rate": 1.6463762478931674e-07, + "loss": 2.6751, + "step": 22020 + }, + { + "epoch": 5.69647241245639, + "learning_rate": 1.646052119797744e-07, + "loss": 2.6391, + "step": 22040 + }, + { + "epoch": 5.701641038893914, + "learning_rate": 1.6457279917023206e-07, + "loss": 2.625, + "step": 22060 + }, + { + "epoch": 5.706809665331438, + "learning_rate": 1.6454038636068975e-07, + "loss": 2.607, + "step": 22080 + }, + { + "epoch": 5.711978291768962, + "learning_rate": 1.6450797355114739e-07, + "loss": 2.6629, + "step": 22100 + }, + { + "epoch": 5.7171469182064865, + "learning_rate": 1.6447556074160507e-07, + "loss": 2.6358, + "step": 22120 + }, + { + "epoch": 5.722315544644011, + "learning_rate": 1.6444314793206274e-07, + "loss": 2.6962, + "step": 22140 + }, + { + "epoch": 5.727484171081535, + "learning_rate": 1.644107351225204e-07, + "loss": 2.6403, + "step": 22160 + }, + { + "epoch": 5.7326527975190595, + "learning_rate": 1.6437832231297809e-07, + "loss": 2.641, + "step": 22180 + }, + { + "epoch": 5.737821423956584, + "learning_rate": 1.6434590950343575e-07, + "loss": 2.6213, + "step": 22200 + }, + { + "epoch": 5.742990050394107, + "learning_rate": 1.643134966938934e-07, + "loss": 2.6508, + "step": 22220 + }, + { + "epoch": 5.748158676831632, + "learning_rate": 1.642810838843511e-07, + "loss": 2.6528, + "step": 22240 + }, + { + "epoch": 5.753327303269156, + "learning_rate": 1.6424867107480873e-07, + "loss": 2.5921, + "step": 22260 + }, + { + "epoch": 5.7584959297066804, + "learning_rate": 1.6421625826526642e-07, + "loss": 2.6379, + "step": 22280 + }, + { + "epoch": 5.763664556144205, + "learning_rate": 1.641838454557241e-07, + "loss": 2.6838, + "step": 22300 + }, + { + "epoch": 5.768833182581729, + "learning_rate": 1.6415143264618175e-07, + "loss": 2.6511, + "step": 22320 + }, + { + "epoch": 5.7740018090192535, + "learning_rate": 1.6411901983663943e-07, + "loss": 2.6547, + "step": 22340 + }, + { + "epoch": 5.779170435456777, + "learning_rate": 1.640866070270971e-07, + "loss": 2.5962, + "step": 22360 + }, + { + "epoch": 5.784339061894301, + "learning_rate": 1.6405419421755476e-07, + "loss": 2.6074, + "step": 22380 + }, + { + "epoch": 5.789507688331826, + "learning_rate": 1.6402178140801245e-07, + "loss": 2.5927, + "step": 22400 + }, + { + "epoch": 5.79467631476935, + "learning_rate": 1.6398936859847008e-07, + "loss": 2.6697, + "step": 22420 + }, + { + "epoch": 5.799844941206874, + "learning_rate": 1.6395695578892777e-07, + "loss": 2.6267, + "step": 22440 + }, + { + "epoch": 5.805013567644399, + "learning_rate": 1.6392454297938546e-07, + "loss": 2.6833, + "step": 22460 + }, + { + "epoch": 5.810182194081923, + "learning_rate": 1.638921301698431e-07, + "loss": 2.6647, + "step": 22480 + }, + { + "epoch": 5.815350820519447, + "learning_rate": 1.6385971736030078e-07, + "loss": 2.6619, + "step": 22500 + }, + { + "epoch": 5.820519446956971, + "learning_rate": 1.6382730455075847e-07, + "loss": 2.6341, + "step": 22520 + }, + { + "epoch": 5.825688073394495, + "learning_rate": 1.637948917412161e-07, + "loss": 2.604, + "step": 22540 + }, + { + "epoch": 5.83085669983202, + "learning_rate": 1.637624789316738e-07, + "loss": 2.6344, + "step": 22560 + }, + { + "epoch": 5.836025326269544, + "learning_rate": 1.6373006612213146e-07, + "loss": 2.6453, + "step": 22580 + }, + { + "epoch": 5.841193952707068, + "learning_rate": 1.6369765331258912e-07, + "loss": 2.6433, + "step": 22600 + }, + { + "epoch": 5.846362579144593, + "learning_rate": 1.636652405030468e-07, + "loss": 2.5673, + "step": 22620 + }, + { + "epoch": 5.851531205582116, + "learning_rate": 1.6363282769350444e-07, + "loss": 2.6455, + "step": 22640 + }, + { + "epoch": 5.856699832019641, + "learning_rate": 1.6360041488396213e-07, + "loss": 2.6617, + "step": 22660 + }, + { + "epoch": 5.861868458457165, + "learning_rate": 1.6356800207441982e-07, + "loss": 2.6395, + "step": 22680 + }, + { + "epoch": 5.867037084894689, + "learning_rate": 1.6353558926487745e-07, + "loss": 2.6358, + "step": 22700 + }, + { + "epoch": 5.872205711332214, + "learning_rate": 1.6350317645533514e-07, + "loss": 2.618, + "step": 22720 + }, + { + "epoch": 5.877374337769738, + "learning_rate": 1.634707636457928e-07, + "loss": 2.6219, + "step": 22740 + }, + { + "epoch": 5.882542964207262, + "learning_rate": 1.6343835083625047e-07, + "loss": 2.6028, + "step": 22760 + }, + { + "epoch": 5.887711590644786, + "learning_rate": 1.6340593802670815e-07, + "loss": 2.599, + "step": 22780 + }, + { + "epoch": 5.89288021708231, + "learning_rate": 1.633735252171658e-07, + "loss": 2.6062, + "step": 22800 + }, + { + "epoch": 5.898048843519835, + "learning_rate": 1.6334111240762348e-07, + "loss": 2.5854, + "step": 22820 + }, + { + "epoch": 5.903217469957359, + "learning_rate": 1.6330869959808117e-07, + "loss": 2.5416, + "step": 22840 + }, + { + "epoch": 5.908386096394883, + "learning_rate": 1.632762867885388e-07, + "loss": 2.6731, + "step": 22860 + }, + { + "epoch": 5.913554722832407, + "learning_rate": 1.632438739789965e-07, + "loss": 2.6271, + "step": 22880 + }, + { + "epoch": 5.918723349269931, + "learning_rate": 1.6321146116945418e-07, + "loss": 2.5896, + "step": 22900 + }, + { + "epoch": 5.9238919757074555, + "learning_rate": 1.6317904835991181e-07, + "loss": 2.6794, + "step": 22920 + }, + { + "epoch": 5.92906060214498, + "learning_rate": 1.631466355503695e-07, + "loss": 2.6051, + "step": 22940 + }, + { + "epoch": 5.934229228582504, + "learning_rate": 1.6311422274082716e-07, + "loss": 2.6901, + "step": 22960 + }, + { + "epoch": 5.939397855020029, + "learning_rate": 1.6308180993128483e-07, + "loss": 2.6288, + "step": 22980 + }, + { + "epoch": 5.944566481457553, + "learning_rate": 1.6304939712174251e-07, + "loss": 2.6806, + "step": 23000 + }, + { + "epoch": 5.949735107895076, + "learning_rate": 1.6301698431220015e-07, + "loss": 2.6003, + "step": 23020 + }, + { + "epoch": 5.954903734332601, + "learning_rate": 1.6298457150265784e-07, + "loss": 2.6595, + "step": 23040 + }, + { + "epoch": 5.960072360770125, + "learning_rate": 1.6295215869311553e-07, + "loss": 2.6563, + "step": 23060 + }, + { + "epoch": 5.9652409872076495, + "learning_rate": 1.6291974588357316e-07, + "loss": 2.6499, + "step": 23080 + }, + { + "epoch": 5.970409613645174, + "learning_rate": 1.6288733307403085e-07, + "loss": 2.6836, + "step": 23100 + }, + { + "epoch": 5.975578240082698, + "learning_rate": 1.6285492026448854e-07, + "loss": 2.6193, + "step": 23120 + }, + { + "epoch": 5.980746866520223, + "learning_rate": 1.6282250745494617e-07, + "loss": 2.6293, + "step": 23140 + }, + { + "epoch": 5.985915492957746, + "learning_rate": 1.6279009464540386e-07, + "loss": 2.7275, + "step": 23160 + }, + { + "epoch": 5.99108411939527, + "learning_rate": 1.6275768183586152e-07, + "loss": 2.6021, + "step": 23180 + }, + { + "epoch": 5.996252745832795, + "learning_rate": 1.6272526902631919e-07, + "loss": 2.6331, + "step": 23200 + }, + { + "epoch": 5.999870784339062, + "eval_bleu": 1.6991, + "eval_gen_len": 66.9245, + "eval_loss": 2.596095085144043, + "eval_runtime": 1347.5897, + "eval_samples_per_second": 1.278, + "eval_steps_per_second": 0.639, + "step": 23214 + }, + { + "epoch": 6.001421372270319, + "learning_rate": 1.6269285621677687e-07, + "loss": 2.6423, + "step": 23220 + }, + { + "epoch": 6.0065899987078435, + "learning_rate": 1.626604434072345e-07, + "loss": 2.667, + "step": 23240 + }, + { + "epoch": 6.011758625145368, + "learning_rate": 1.626280305976922e-07, + "loss": 2.6741, + "step": 23260 + }, + { + "epoch": 6.016927251582892, + "learning_rate": 1.6259561778814989e-07, + "loss": 2.6269, + "step": 23280 + }, + { + "epoch": 6.022095878020416, + "learning_rate": 1.6256320497860752e-07, + "loss": 2.6817, + "step": 23300 + }, + { + "epoch": 6.02726450445794, + "learning_rate": 1.625307921690652e-07, + "loss": 2.6804, + "step": 23320 + }, + { + "epoch": 6.032433130895464, + "learning_rate": 1.6249837935952287e-07, + "loss": 2.6283, + "step": 23340 + }, + { + "epoch": 6.037601757332989, + "learning_rate": 1.6246596654998053e-07, + "loss": 2.6344, + "step": 23360 + }, + { + "epoch": 6.042770383770513, + "learning_rate": 1.6243355374043822e-07, + "loss": 2.6453, + "step": 23380 + }, + { + "epoch": 6.0479390102080375, + "learning_rate": 1.6240114093089586e-07, + "loss": 2.5926, + "step": 23400 + }, + { + "epoch": 6.053107636645562, + "learning_rate": 1.6236872812135355e-07, + "loss": 2.6709, + "step": 23420 + }, + { + "epoch": 6.058276263083085, + "learning_rate": 1.6233631531181123e-07, + "loss": 2.6228, + "step": 23440 + }, + { + "epoch": 6.06344488952061, + "learning_rate": 1.6230390250226887e-07, + "loss": 2.6006, + "step": 23460 + }, + { + "epoch": 6.068613515958134, + "learning_rate": 1.6227148969272656e-07, + "loss": 2.6179, + "step": 23480 + }, + { + "epoch": 6.073782142395658, + "learning_rate": 1.6223907688318425e-07, + "loss": 2.63, + "step": 23500 + }, + { + "epoch": 6.078950768833183, + "learning_rate": 1.6220666407364188e-07, + "loss": 2.653, + "step": 23520 + }, + { + "epoch": 6.084119395270707, + "learning_rate": 1.6217425126409957e-07, + "loss": 2.6129, + "step": 23540 + }, + { + "epoch": 6.0892880217082315, + "learning_rate": 1.6214183845455723e-07, + "loss": 2.6008, + "step": 23560 + }, + { + "epoch": 6.094456648145755, + "learning_rate": 1.621094256450149e-07, + "loss": 2.6594, + "step": 23580 + }, + { + "epoch": 6.099625274583279, + "learning_rate": 1.6207701283547258e-07, + "loss": 2.617, + "step": 23600 + }, + { + "epoch": 6.104793901020804, + "learning_rate": 1.6204460002593022e-07, + "loss": 2.6392, + "step": 23620 + }, + { + "epoch": 6.109962527458328, + "learning_rate": 1.620121872163879e-07, + "loss": 2.6315, + "step": 23640 + }, + { + "epoch": 6.115131153895852, + "learning_rate": 1.619797744068456e-07, + "loss": 2.6131, + "step": 23660 + }, + { + "epoch": 6.120299780333377, + "learning_rate": 1.6194736159730323e-07, + "loss": 2.6512, + "step": 23680 + }, + { + "epoch": 6.1254684067709, + "learning_rate": 1.6191494878776092e-07, + "loss": 2.6196, + "step": 23700 + }, + { + "epoch": 6.130637033208425, + "learning_rate": 1.618825359782186e-07, + "loss": 2.6234, + "step": 23720 + }, + { + "epoch": 6.135805659645949, + "learning_rate": 1.6185012316867624e-07, + "loss": 2.5817, + "step": 23740 + }, + { + "epoch": 6.140974286083473, + "learning_rate": 1.6181771035913393e-07, + "loss": 2.5873, + "step": 23760 + }, + { + "epoch": 6.146142912520998, + "learning_rate": 1.617852975495916e-07, + "loss": 2.5957, + "step": 23780 + }, + { + "epoch": 6.151311538958522, + "learning_rate": 1.6175288474004925e-07, + "loss": 2.613, + "step": 23800 + }, + { + "epoch": 6.156480165396046, + "learning_rate": 1.6172047193050694e-07, + "loss": 2.5577, + "step": 23820 + }, + { + "epoch": 6.16164879183357, + "learning_rate": 1.6168805912096458e-07, + "loss": 2.6101, + "step": 23840 + }, + { + "epoch": 6.166817418271094, + "learning_rate": 1.6165564631142227e-07, + "loss": 2.5553, + "step": 23860 + }, + { + "epoch": 6.171986044708619, + "learning_rate": 1.6162323350187995e-07, + "loss": 2.6326, + "step": 23880 + }, + { + "epoch": 6.177154671146143, + "learning_rate": 1.615908206923376e-07, + "loss": 2.5922, + "step": 23900 + }, + { + "epoch": 6.182323297583667, + "learning_rate": 1.6155840788279528e-07, + "loss": 2.5913, + "step": 23920 + }, + { + "epoch": 6.187491924021192, + "learning_rate": 1.6152599507325294e-07, + "loss": 2.6378, + "step": 23940 + }, + { + "epoch": 6.192660550458716, + "learning_rate": 1.614935822637106e-07, + "loss": 2.5969, + "step": 23960 + }, + { + "epoch": 6.1978291768962395, + "learning_rate": 1.614611694541683e-07, + "loss": 2.5971, + "step": 23980 + }, + { + "epoch": 6.202997803333764, + "learning_rate": 1.6142875664462593e-07, + "loss": 2.616, + "step": 24000 + }, + { + "epoch": 6.208166429771288, + "learning_rate": 1.6139634383508361e-07, + "loss": 2.6352, + "step": 24020 + }, + { + "epoch": 6.2133350562088125, + "learning_rate": 1.613639310255413e-07, + "loss": 2.6371, + "step": 24040 + }, + { + "epoch": 6.218503682646337, + "learning_rate": 1.6133151821599894e-07, + "loss": 2.5946, + "step": 24060 + }, + { + "epoch": 6.223672309083861, + "learning_rate": 1.6129910540645663e-07, + "loss": 2.6379, + "step": 24080 + }, + { + "epoch": 6.228840935521385, + "learning_rate": 1.6126669259691431e-07, + "loss": 2.6046, + "step": 24100 + }, + { + "epoch": 6.234009561958909, + "learning_rate": 1.6123427978737195e-07, + "loss": 2.653, + "step": 24120 + }, + { + "epoch": 6.2391781883964335, + "learning_rate": 1.6120186697782964e-07, + "loss": 2.6409, + "step": 24140 + }, + { + "epoch": 6.244346814833958, + "learning_rate": 1.611694541682873e-07, + "loss": 2.6077, + "step": 24160 + }, + { + "epoch": 6.249515441271482, + "learning_rate": 1.6113704135874496e-07, + "loss": 2.5993, + "step": 24180 + }, + { + "epoch": 6.2546840677090065, + "learning_rate": 1.6110462854920265e-07, + "loss": 2.6326, + "step": 24200 + }, + { + "epoch": 6.259852694146531, + "learning_rate": 1.6107221573966029e-07, + "loss": 2.612, + "step": 24220 + }, + { + "epoch": 6.265021320584054, + "learning_rate": 1.6103980293011797e-07, + "loss": 2.662, + "step": 24240 + }, + { + "epoch": 6.270189947021579, + "learning_rate": 1.6100739012057566e-07, + "loss": 2.6377, + "step": 24260 + }, + { + "epoch": 6.275358573459103, + "learning_rate": 1.609749773110333e-07, + "loss": 2.6113, + "step": 24280 + }, + { + "epoch": 6.280527199896627, + "learning_rate": 1.6094256450149099e-07, + "loss": 2.6094, + "step": 24300 + }, + { + "epoch": 6.285695826334152, + "learning_rate": 1.6091015169194867e-07, + "loss": 2.6185, + "step": 24320 + }, + { + "epoch": 6.290864452771676, + "learning_rate": 1.608777388824063e-07, + "loss": 2.6523, + "step": 24340 + }, + { + "epoch": 6.2960330792092005, + "learning_rate": 1.60845326072864e-07, + "loss": 2.6458, + "step": 24360 + }, + { + "epoch": 6.301201705646724, + "learning_rate": 1.6081291326332166e-07, + "loss": 2.5826, + "step": 24380 + }, + { + "epoch": 6.306370332084248, + "learning_rate": 1.6078050045377932e-07, + "loss": 2.6395, + "step": 24400 + }, + { + "epoch": 6.311538958521773, + "learning_rate": 1.60748087644237e-07, + "loss": 2.6305, + "step": 24420 + }, + { + "epoch": 6.316707584959297, + "learning_rate": 1.6071567483469465e-07, + "loss": 2.6493, + "step": 24440 + }, + { + "epoch": 6.321876211396821, + "learning_rate": 1.6068326202515233e-07, + "loss": 2.6198, + "step": 24460 + }, + { + "epoch": 6.327044837834346, + "learning_rate": 1.6065084921561002e-07, + "loss": 2.5705, + "step": 24480 + }, + { + "epoch": 6.33221346427187, + "learning_rate": 1.6061843640606766e-07, + "loss": 2.5797, + "step": 24500 + }, + { + "epoch": 6.337382090709394, + "learning_rate": 1.6058602359652535e-07, + "loss": 2.6152, + "step": 24520 + }, + { + "epoch": 6.342550717146918, + "learning_rate": 1.60553610786983e-07, + "loss": 2.6075, + "step": 24540 + }, + { + "epoch": 6.347719343584442, + "learning_rate": 1.6052119797744067e-07, + "loss": 2.6446, + "step": 24560 + }, + { + "epoch": 6.352887970021967, + "learning_rate": 1.6048878516789836e-07, + "loss": 2.6204, + "step": 24580 + }, + { + "epoch": 6.358056596459491, + "learning_rate": 1.60456372358356e-07, + "loss": 2.6079, + "step": 24600 + }, + { + "epoch": 6.363225222897015, + "learning_rate": 1.6042395954881368e-07, + "loss": 2.59, + "step": 24620 + }, + { + "epoch": 6.36839384933454, + "learning_rate": 1.6039154673927137e-07, + "loss": 2.6417, + "step": 24640 + }, + { + "epoch": 6.373562475772063, + "learning_rate": 1.60359133929729e-07, + "loss": 2.6426, + "step": 24660 + }, + { + "epoch": 6.378731102209588, + "learning_rate": 1.603267211201867e-07, + "loss": 2.6004, + "step": 24680 + }, + { + "epoch": 6.383899728647112, + "learning_rate": 1.6029430831064438e-07, + "loss": 2.6422, + "step": 24700 + }, + { + "epoch": 6.389068355084636, + "learning_rate": 1.6026189550110202e-07, + "loss": 2.595, + "step": 24720 + }, + { + "epoch": 6.394236981522161, + "learning_rate": 1.602294826915597e-07, + "loss": 2.6091, + "step": 24740 + }, + { + "epoch": 6.399405607959685, + "learning_rate": 1.6019706988201737e-07, + "loss": 2.5978, + "step": 24760 + }, + { + "epoch": 6.404574234397209, + "learning_rate": 1.6016465707247503e-07, + "loss": 2.5525, + "step": 24780 + }, + { + "epoch": 6.409742860834733, + "learning_rate": 1.6013224426293272e-07, + "loss": 2.6011, + "step": 24800 + }, + { + "epoch": 6.414911487272257, + "learning_rate": 1.6009983145339035e-07, + "loss": 2.5727, + "step": 24820 + }, + { + "epoch": 6.420080113709782, + "learning_rate": 1.6006741864384804e-07, + "loss": 2.6214, + "step": 24840 + }, + { + "epoch": 6.425248740147306, + "learning_rate": 1.6003500583430573e-07, + "loss": 2.6492, + "step": 24860 + }, + { + "epoch": 6.43041736658483, + "learning_rate": 1.6000259302476336e-07, + "loss": 2.628, + "step": 24880 + }, + { + "epoch": 6.435585993022355, + "learning_rate": 1.5997018021522105e-07, + "loss": 2.6282, + "step": 24900 + }, + { + "epoch": 6.440754619459878, + "learning_rate": 1.5993776740567874e-07, + "loss": 2.6483, + "step": 24920 + }, + { + "epoch": 6.4459232458974025, + "learning_rate": 1.5990535459613638e-07, + "loss": 2.6026, + "step": 24940 + }, + { + "epoch": 6.451091872334927, + "learning_rate": 1.5987294178659407e-07, + "loss": 2.6324, + "step": 24960 + }, + { + "epoch": 6.456260498772451, + "learning_rate": 1.5984052897705173e-07, + "loss": 2.5601, + "step": 24980 + }, + { + "epoch": 6.461429125209976, + "learning_rate": 1.598081161675094e-07, + "loss": 2.6437, + "step": 25000 + }, + { + "epoch": 6.4665977516475, + "learning_rate": 1.5977570335796708e-07, + "loss": 2.6326, + "step": 25020 + }, + { + "epoch": 6.471766378085024, + "learning_rate": 1.597432905484247e-07, + "loss": 2.6664, + "step": 25040 + }, + { + "epoch": 6.476935004522548, + "learning_rate": 1.597108777388824e-07, + "loss": 2.6057, + "step": 25060 + }, + { + "epoch": 6.482103630960072, + "learning_rate": 1.596784649293401e-07, + "loss": 2.6007, + "step": 25080 + }, + { + "epoch": 6.4872722573975965, + "learning_rate": 1.5964605211979772e-07, + "loss": 2.5935, + "step": 25100 + }, + { + "epoch": 6.492440883835121, + "learning_rate": 1.5961363931025541e-07, + "loss": 2.5344, + "step": 25120 + }, + { + "epoch": 6.497609510272645, + "learning_rate": 1.5958122650071308e-07, + "loss": 2.6249, + "step": 25140 + }, + { + "epoch": 6.50277813671017, + "learning_rate": 1.5954881369117074e-07, + "loss": 2.5882, + "step": 25160 + }, + { + "epoch": 6.507946763147693, + "learning_rate": 1.5951640088162843e-07, + "loss": 2.6219, + "step": 25180 + }, + { + "epoch": 6.513115389585217, + "learning_rate": 1.5948398807208606e-07, + "loss": 2.5838, + "step": 25200 + }, + { + "epoch": 6.518284016022742, + "learning_rate": 1.5945157526254375e-07, + "loss": 2.5943, + "step": 25220 + }, + { + "epoch": 6.523452642460266, + "learning_rate": 1.5941916245300144e-07, + "loss": 2.6468, + "step": 25240 + }, + { + "epoch": 6.5286212688977905, + "learning_rate": 1.5938674964345907e-07, + "loss": 2.6726, + "step": 25260 + }, + { + "epoch": 6.533789895335315, + "learning_rate": 1.5935433683391676e-07, + "loss": 2.5732, + "step": 25280 + }, + { + "epoch": 6.538958521772839, + "learning_rate": 1.5932192402437442e-07, + "loss": 2.5739, + "step": 25300 + }, + { + "epoch": 6.544127148210363, + "learning_rate": 1.5928951121483208e-07, + "loss": 2.5914, + "step": 25320 + }, + { + "epoch": 6.549295774647887, + "learning_rate": 1.5925709840528977e-07, + "loss": 2.6142, + "step": 25340 + }, + { + "epoch": 6.554464401085411, + "learning_rate": 1.5922468559574744e-07, + "loss": 2.6145, + "step": 25360 + }, + { + "epoch": 6.559633027522936, + "learning_rate": 1.591922727862051e-07, + "loss": 2.6449, + "step": 25380 + }, + { + "epoch": 6.56480165396046, + "learning_rate": 1.5915985997666279e-07, + "loss": 2.586, + "step": 25400 + }, + { + "epoch": 6.5699702803979845, + "learning_rate": 1.5912744716712042e-07, + "loss": 2.6136, + "step": 25420 + }, + { + "epoch": 6.575138906835509, + "learning_rate": 1.590950343575781e-07, + "loss": 2.6556, + "step": 25440 + }, + { + "epoch": 6.580307533273032, + "learning_rate": 1.5906262154803577e-07, + "loss": 2.6557, + "step": 25460 + }, + { + "epoch": 6.585476159710557, + "learning_rate": 1.5903020873849343e-07, + "loss": 2.5518, + "step": 25480 + }, + { + "epoch": 6.590644786148081, + "learning_rate": 1.5899779592895112e-07, + "loss": 2.6054, + "step": 25500 + }, + { + "epoch": 6.595813412585605, + "learning_rate": 1.5896538311940878e-07, + "loss": 2.5766, + "step": 25520 + }, + { + "epoch": 6.60098203902313, + "learning_rate": 1.5893297030986644e-07, + "loss": 2.5573, + "step": 25540 + }, + { + "epoch": 6.606150665460654, + "learning_rate": 1.5890055750032413e-07, + "loss": 2.6429, + "step": 25560 + }, + { + "epoch": 6.6113192918981785, + "learning_rate": 1.588681446907818e-07, + "loss": 2.6795, + "step": 25580 + }, + { + "epoch": 6.616487918335702, + "learning_rate": 1.5883573188123946e-07, + "loss": 2.6573, + "step": 25600 + }, + { + "epoch": 6.621656544773226, + "learning_rate": 1.5880331907169712e-07, + "loss": 2.5762, + "step": 25620 + }, + { + "epoch": 6.626825171210751, + "learning_rate": 1.5877090626215478e-07, + "loss": 2.6336, + "step": 25640 + }, + { + "epoch": 6.631993797648275, + "learning_rate": 1.5873849345261247e-07, + "loss": 2.5999, + "step": 25660 + }, + { + "epoch": 6.637162424085799, + "learning_rate": 1.5870608064307013e-07, + "loss": 2.6625, + "step": 25680 + }, + { + "epoch": 6.642331050523324, + "learning_rate": 1.586736678335278e-07, + "loss": 2.5277, + "step": 25700 + }, + { + "epoch": 6.647499676960848, + "learning_rate": 1.5864125502398548e-07, + "loss": 2.5688, + "step": 25720 + }, + { + "epoch": 6.652668303398372, + "learning_rate": 1.5860884221444314e-07, + "loss": 2.5813, + "step": 25740 + }, + { + "epoch": 6.657836929835896, + "learning_rate": 1.585764294049008e-07, + "loss": 2.622, + "step": 25760 + }, + { + "epoch": 6.66300555627342, + "learning_rate": 1.5854401659535847e-07, + "loss": 2.5956, + "step": 25780 + }, + { + "epoch": 6.668174182710945, + "learning_rate": 1.5851160378581613e-07, + "loss": 2.5834, + "step": 25800 + }, + { + "epoch": 6.673342809148469, + "learning_rate": 1.5847919097627382e-07, + "loss": 2.5737, + "step": 25820 + }, + { + "epoch": 6.678511435585993, + "learning_rate": 1.5844677816673148e-07, + "loss": 2.5815, + "step": 25840 + }, + { + "epoch": 6.683680062023518, + "learning_rate": 1.5841436535718914e-07, + "loss": 2.5868, + "step": 25860 + }, + { + "epoch": 6.688848688461041, + "learning_rate": 1.5838195254764683e-07, + "loss": 2.6376, + "step": 25880 + }, + { + "epoch": 6.6940173148985656, + "learning_rate": 1.583495397381045e-07, + "loss": 2.5857, + "step": 25900 + }, + { + "epoch": 6.69918594133609, + "learning_rate": 1.5831712692856215e-07, + "loss": 2.6078, + "step": 25920 + }, + { + "epoch": 6.704354567773614, + "learning_rate": 1.5828471411901981e-07, + "loss": 2.6262, + "step": 25940 + }, + { + "epoch": 6.709523194211139, + "learning_rate": 1.582523013094775e-07, + "loss": 2.5676, + "step": 25960 + }, + { + "epoch": 6.714691820648663, + "learning_rate": 1.5821988849993516e-07, + "loss": 2.6036, + "step": 25980 + }, + { + "epoch": 6.7198604470861865, + "learning_rate": 1.5818747569039283e-07, + "loss": 2.5575, + "step": 26000 + }, + { + "epoch": 6.725029073523711, + "learning_rate": 1.581550628808505e-07, + "loss": 2.5708, + "step": 26020 + }, + { + "epoch": 6.730197699961235, + "learning_rate": 1.5812265007130818e-07, + "loss": 2.6489, + "step": 26040 + }, + { + "epoch": 6.7353663263987595, + "learning_rate": 1.5809023726176584e-07, + "loss": 2.6338, + "step": 26060 + }, + { + "epoch": 6.740534952836284, + "learning_rate": 1.580578244522235e-07, + "loss": 2.6506, + "step": 26080 + }, + { + "epoch": 6.745703579273808, + "learning_rate": 1.5802541164268116e-07, + "loss": 2.541, + "step": 26100 + }, + { + "epoch": 6.750872205711332, + "learning_rate": 1.5799299883313885e-07, + "loss": 2.5367, + "step": 26120 + }, + { + "epoch": 6.756040832148856, + "learning_rate": 1.579605860235965e-07, + "loss": 2.5891, + "step": 26140 + }, + { + "epoch": 6.7612094585863804, + "learning_rate": 1.5792817321405417e-07, + "loss": 2.6226, + "step": 26160 + }, + { + "epoch": 6.766378085023905, + "learning_rate": 1.5789576040451186e-07, + "loss": 2.6313, + "step": 26180 + }, + { + "epoch": 6.771546711461429, + "learning_rate": 1.5786334759496952e-07, + "loss": 2.5891, + "step": 26200 + }, + { + "epoch": 6.7767153378989535, + "learning_rate": 1.5783093478542719e-07, + "loss": 2.5806, + "step": 26220 + }, + { + "epoch": 6.781883964336478, + "learning_rate": 1.5779852197588485e-07, + "loss": 2.5518, + "step": 26240 + }, + { + "epoch": 6.787052590774001, + "learning_rate": 1.5776610916634254e-07, + "loss": 2.6104, + "step": 26260 + }, + { + "epoch": 6.792221217211526, + "learning_rate": 1.577336963568002e-07, + "loss": 2.6052, + "step": 26280 + }, + { + "epoch": 6.79738984364905, + "learning_rate": 1.5770128354725786e-07, + "loss": 2.5845, + "step": 26300 + }, + { + "epoch": 6.802558470086574, + "learning_rate": 1.5766887073771552e-07, + "loss": 2.6096, + "step": 26320 + }, + { + "epoch": 6.807727096524099, + "learning_rate": 1.576364579281732e-07, + "loss": 2.5916, + "step": 26340 + }, + { + "epoch": 6.812895722961623, + "learning_rate": 1.5760404511863087e-07, + "loss": 2.584, + "step": 26360 + }, + { + "epoch": 6.8180643493991475, + "learning_rate": 1.5757163230908853e-07, + "loss": 2.6194, + "step": 26380 + }, + { + "epoch": 6.823232975836671, + "learning_rate": 1.575392194995462e-07, + "loss": 2.6581, + "step": 26400 + }, + { + "epoch": 6.828401602274195, + "learning_rate": 1.5750680669000388e-07, + "loss": 2.6636, + "step": 26420 + }, + { + "epoch": 6.83357022871172, + "learning_rate": 1.5747439388046155e-07, + "loss": 2.6135, + "step": 26440 + }, + { + "epoch": 6.838738855149244, + "learning_rate": 1.574419810709192e-07, + "loss": 2.609, + "step": 26460 + }, + { + "epoch": 6.843907481586768, + "learning_rate": 1.574095682613769e-07, + "loss": 2.6168, + "step": 26480 + }, + { + "epoch": 6.849076108024293, + "learning_rate": 1.5737715545183456e-07, + "loss": 2.5408, + "step": 26500 + }, + { + "epoch": 6.854244734461817, + "learning_rate": 1.5734474264229222e-07, + "loss": 2.6252, + "step": 26520 + }, + { + "epoch": 6.859413360899341, + "learning_rate": 1.5731232983274988e-07, + "loss": 2.5935, + "step": 26540 + }, + { + "epoch": 6.864581987336865, + "learning_rate": 1.5727991702320757e-07, + "loss": 2.5767, + "step": 26560 + }, + { + "epoch": 6.869750613774389, + "learning_rate": 1.5724750421366523e-07, + "loss": 2.6351, + "step": 26580 + }, + { + "epoch": 6.874919240211914, + "learning_rate": 1.572150914041229e-07, + "loss": 2.5793, + "step": 26600 + }, + { + "epoch": 6.880087866649438, + "learning_rate": 1.5718267859458056e-07, + "loss": 2.6026, + "step": 26620 + }, + { + "epoch": 6.885256493086962, + "learning_rate": 1.5715026578503824e-07, + "loss": 2.5796, + "step": 26640 + }, + { + "epoch": 6.890425119524487, + "learning_rate": 1.571178529754959e-07, + "loss": 2.6488, + "step": 26660 + }, + { + "epoch": 6.89559374596201, + "learning_rate": 1.5708544016595357e-07, + "loss": 2.6461, + "step": 26680 + }, + { + "epoch": 6.900762372399535, + "learning_rate": 1.5705302735641123e-07, + "loss": 2.5738, + "step": 26700 + }, + { + "epoch": 6.905930998837059, + "learning_rate": 1.5702061454686892e-07, + "loss": 2.5407, + "step": 26720 + }, + { + "epoch": 6.911099625274583, + "learning_rate": 1.5698820173732658e-07, + "loss": 2.6304, + "step": 26740 + }, + { + "epoch": 6.916268251712108, + "learning_rate": 1.5695578892778424e-07, + "loss": 2.576, + "step": 26760 + }, + { + "epoch": 6.921436878149632, + "learning_rate": 1.5692337611824193e-07, + "loss": 2.5784, + "step": 26780 + }, + { + "epoch": 6.926605504587156, + "learning_rate": 1.568909633086996e-07, + "loss": 2.6124, + "step": 26800 + }, + { + "epoch": 6.93177413102468, + "learning_rate": 1.5685855049915725e-07, + "loss": 2.5992, + "step": 26820 + }, + { + "epoch": 6.936942757462204, + "learning_rate": 1.5682613768961492e-07, + "loss": 2.5961, + "step": 26840 + }, + { + "epoch": 6.942111383899729, + "learning_rate": 1.567937248800726e-07, + "loss": 2.5989, + "step": 26860 + }, + { + "epoch": 6.947280010337253, + "learning_rate": 1.5676131207053027e-07, + "loss": 2.6514, + "step": 26880 + }, + { + "epoch": 6.952448636774777, + "learning_rate": 1.5672889926098793e-07, + "loss": 2.5921, + "step": 26900 + }, + { + "epoch": 6.957617263212302, + "learning_rate": 1.566964864514456e-07, + "loss": 2.5907, + "step": 26920 + }, + { + "epoch": 6.962785889649826, + "learning_rate": 1.5666407364190328e-07, + "loss": 2.5183, + "step": 26940 + }, + { + "epoch": 6.9679545160873495, + "learning_rate": 1.5663166083236094e-07, + "loss": 2.6151, + "step": 26960 + }, + { + "epoch": 6.973123142524874, + "learning_rate": 1.565992480228186e-07, + "loss": 2.5454, + "step": 26980 + }, + { + "epoch": 6.978291768962398, + "learning_rate": 1.5656683521327626e-07, + "loss": 2.5349, + "step": 27000 + }, + { + "epoch": 6.983460395399923, + "learning_rate": 1.5653442240373395e-07, + "loss": 2.6422, + "step": 27020 + }, + { + "epoch": 6.988629021837447, + "learning_rate": 1.5650200959419161e-07, + "loss": 2.6273, + "step": 27040 + }, + { + "epoch": 6.993797648274971, + "learning_rate": 1.5646959678464928e-07, + "loss": 2.5851, + "step": 27060 + }, + { + "epoch": 6.998966274712495, + "learning_rate": 1.5643718397510696e-07, + "loss": 2.5716, + "step": 27080 + }, + { + "epoch": 7.0, + "eval_bleu": 5.2201, + "eval_gen_len": 46.1405, + "eval_loss": 2.561117172241211, + "eval_runtime": 958.6568, + "eval_samples_per_second": 1.796, + "eval_steps_per_second": 0.898, + "step": 27084 + }, + { + "epoch": 7.004134901150019, + "learning_rate": 1.5640477116556463e-07, + "loss": 2.5946, + "step": 27100 + }, + { + "epoch": 7.0093035275875435, + "learning_rate": 1.563723583560223e-07, + "loss": 2.5941, + "step": 27120 + }, + { + "epoch": 7.014472154025068, + "learning_rate": 1.5633994554647995e-07, + "loss": 2.5895, + "step": 27140 + }, + { + "epoch": 7.019640780462592, + "learning_rate": 1.5630753273693764e-07, + "loss": 2.5711, + "step": 27160 + }, + { + "epoch": 7.024809406900117, + "learning_rate": 1.562751199273953e-07, + "loss": 2.6074, + "step": 27180 + }, + { + "epoch": 7.029978033337641, + "learning_rate": 1.5624270711785296e-07, + "loss": 2.5763, + "step": 27200 + }, + { + "epoch": 7.035146659775164, + "learning_rate": 1.5621029430831062e-07, + "loss": 2.5656, + "step": 27220 + }, + { + "epoch": 7.040315286212689, + "learning_rate": 1.561778814987683e-07, + "loss": 2.576, + "step": 27240 + }, + { + "epoch": 7.045483912650213, + "learning_rate": 1.5614546868922597e-07, + "loss": 2.6323, + "step": 27260 + }, + { + "epoch": 7.0506525390877375, + "learning_rate": 1.5611305587968364e-07, + "loss": 2.6121, + "step": 27280 + }, + { + "epoch": 7.055821165525262, + "learning_rate": 1.560806430701413e-07, + "loss": 2.5962, + "step": 27300 + }, + { + "epoch": 7.060989791962786, + "learning_rate": 1.5604823026059899e-07, + "loss": 2.614, + "step": 27320 + }, + { + "epoch": 7.06615841840031, + "learning_rate": 1.5601581745105665e-07, + "loss": 2.5828, + "step": 27340 + }, + { + "epoch": 7.071327044837834, + "learning_rate": 1.559834046415143e-07, + "loss": 2.6366, + "step": 27360 + }, + { + "epoch": 7.076495671275358, + "learning_rate": 1.55950991831972e-07, + "loss": 2.5704, + "step": 27380 + }, + { + "epoch": 7.081664297712883, + "learning_rate": 1.5591857902242966e-07, + "loss": 2.6038, + "step": 27400 + }, + { + "epoch": 7.086832924150407, + "learning_rate": 1.5588616621288732e-07, + "loss": 2.5829, + "step": 27420 + }, + { + "epoch": 7.0920015505879315, + "learning_rate": 1.5585375340334498e-07, + "loss": 2.5535, + "step": 27440 + }, + { + "epoch": 7.097170177025456, + "learning_rate": 1.5582134059380267e-07, + "loss": 2.6527, + "step": 27460 + }, + { + "epoch": 7.102338803462979, + "learning_rate": 1.5578892778426033e-07, + "loss": 2.5602, + "step": 27480 + }, + { + "epoch": 7.107507429900504, + "learning_rate": 1.55756514974718e-07, + "loss": 2.5519, + "step": 27500 + }, + { + "epoch": 7.112676056338028, + "learning_rate": 1.5572410216517566e-07, + "loss": 2.6072, + "step": 27520 + }, + { + "epoch": 7.117844682775552, + "learning_rate": 1.5569168935563335e-07, + "loss": 2.5692, + "step": 27540 + }, + { + "epoch": 7.123013309213077, + "learning_rate": 1.55659276546091e-07, + "loss": 2.5865, + "step": 27560 + }, + { + "epoch": 7.128181935650601, + "learning_rate": 1.5562686373654867e-07, + "loss": 2.5513, + "step": 27580 + }, + { + "epoch": 7.1333505620881255, + "learning_rate": 1.5559445092700633e-07, + "loss": 2.5622, + "step": 27600 + }, + { + "epoch": 7.138519188525649, + "learning_rate": 1.5556203811746402e-07, + "loss": 2.5787, + "step": 27620 + }, + { + "epoch": 7.143687814963173, + "learning_rate": 1.5552962530792168e-07, + "loss": 2.5697, + "step": 27640 + }, + { + "epoch": 7.148856441400698, + "learning_rate": 1.5549721249837934e-07, + "loss": 2.5477, + "step": 27660 + }, + { + "epoch": 7.154025067838222, + "learning_rate": 1.5546479968883703e-07, + "loss": 2.5206, + "step": 27680 + }, + { + "epoch": 7.159193694275746, + "learning_rate": 1.554323868792947e-07, + "loss": 2.6325, + "step": 27700 + }, + { + "epoch": 7.164362320713271, + "learning_rate": 1.5539997406975236e-07, + "loss": 2.5897, + "step": 27720 + }, + { + "epoch": 7.169530947150795, + "learning_rate": 1.5536756126021002e-07, + "loss": 2.5669, + "step": 27740 + }, + { + "epoch": 7.174699573588319, + "learning_rate": 1.553351484506677e-07, + "loss": 2.5391, + "step": 27760 + }, + { + "epoch": 7.179868200025843, + "learning_rate": 1.5530273564112537e-07, + "loss": 2.5715, + "step": 27780 + }, + { + "epoch": 7.185036826463367, + "learning_rate": 1.5527032283158303e-07, + "loss": 2.6015, + "step": 27800 + }, + { + "epoch": 7.190205452900892, + "learning_rate": 1.552379100220407e-07, + "loss": 2.6043, + "step": 27820 + }, + { + "epoch": 7.195374079338416, + "learning_rate": 1.5520549721249838e-07, + "loss": 2.5723, + "step": 27840 + }, + { + "epoch": 7.20054270577594, + "learning_rate": 1.5517308440295604e-07, + "loss": 2.5877, + "step": 27860 + }, + { + "epoch": 7.205711332213465, + "learning_rate": 1.551406715934137e-07, + "loss": 2.5823, + "step": 27880 + }, + { + "epoch": 7.210879958650988, + "learning_rate": 1.5510825878387137e-07, + "loss": 2.5435, + "step": 27900 + }, + { + "epoch": 7.2160485850885125, + "learning_rate": 1.5507584597432905e-07, + "loss": 2.5416, + "step": 27920 + }, + { + "epoch": 7.221217211526037, + "learning_rate": 1.5504343316478672e-07, + "loss": 2.6001, + "step": 27940 + }, + { + "epoch": 7.226385837963561, + "learning_rate": 1.5501102035524438e-07, + "loss": 2.6136, + "step": 27960 + }, + { + "epoch": 7.231554464401086, + "learning_rate": 1.5497860754570204e-07, + "loss": 2.6079, + "step": 27980 + }, + { + "epoch": 7.23672309083861, + "learning_rate": 1.5494619473615973e-07, + "loss": 2.6051, + "step": 28000 + }, + { + "epoch": 7.2418917172761335, + "learning_rate": 1.549137819266174e-07, + "loss": 2.6275, + "step": 28020 + }, + { + "epoch": 7.247060343713658, + "learning_rate": 1.5488136911707505e-07, + "loss": 2.6114, + "step": 28040 + }, + { + "epoch": 7.252228970151182, + "learning_rate": 1.5484895630753274e-07, + "loss": 2.6532, + "step": 28060 + }, + { + "epoch": 7.2573975965887065, + "learning_rate": 1.548165434979904e-07, + "loss": 2.5954, + "step": 28080 + }, + { + "epoch": 7.262566223026231, + "learning_rate": 1.5478413068844806e-07, + "loss": 2.555, + "step": 28100 + }, + { + "epoch": 7.267734849463755, + "learning_rate": 1.5475171787890573e-07, + "loss": 2.601, + "step": 28120 + }, + { + "epoch": 7.27290347590128, + "learning_rate": 1.547193050693634e-07, + "loss": 2.5666, + "step": 28140 + }, + { + "epoch": 7.278072102338803, + "learning_rate": 1.5468689225982108e-07, + "loss": 2.563, + "step": 28160 + }, + { + "epoch": 7.283240728776327, + "learning_rate": 1.5465447945027874e-07, + "loss": 2.5919, + "step": 28180 + }, + { + "epoch": 7.288409355213852, + "learning_rate": 1.546220666407364e-07, + "loss": 2.5042, + "step": 28200 + }, + { + "epoch": 7.293577981651376, + "learning_rate": 1.545896538311941e-07, + "loss": 2.614, + "step": 28220 + }, + { + "epoch": 7.2987466080889005, + "learning_rate": 1.5455724102165175e-07, + "loss": 2.5763, + "step": 28240 + }, + { + "epoch": 7.303915234526425, + "learning_rate": 1.545248282121094e-07, + "loss": 2.5852, + "step": 28260 + }, + { + "epoch": 7.309083860963949, + "learning_rate": 1.544924154025671e-07, + "loss": 2.638, + "step": 28280 + }, + { + "epoch": 7.314252487401473, + "learning_rate": 1.5446000259302476e-07, + "loss": 2.5642, + "step": 28300 + }, + { + "epoch": 7.319421113838997, + "learning_rate": 1.5442758978348242e-07, + "loss": 2.5905, + "step": 28320 + }, + { + "epoch": 7.324589740276521, + "learning_rate": 1.5439517697394009e-07, + "loss": 2.5641, + "step": 28340 + }, + { + "epoch": 7.329758366714046, + "learning_rate": 1.5436276416439775e-07, + "loss": 2.5369, + "step": 28360 + }, + { + "epoch": 7.33492699315157, + "learning_rate": 1.5433035135485544e-07, + "loss": 2.5828, + "step": 28380 + }, + { + "epoch": 7.3400956195890945, + "learning_rate": 1.542979385453131e-07, + "loss": 2.6183, + "step": 28400 + }, + { + "epoch": 7.345264246026618, + "learning_rate": 1.5426552573577076e-07, + "loss": 2.5041, + "step": 28420 + }, + { + "epoch": 7.350432872464142, + "learning_rate": 1.5423311292622845e-07, + "loss": 2.6381, + "step": 28440 + }, + { + "epoch": 7.355601498901667, + "learning_rate": 1.542007001166861e-07, + "loss": 2.607, + "step": 28460 + }, + { + "epoch": 7.360770125339191, + "learning_rate": 1.5416828730714377e-07, + "loss": 2.5654, + "step": 28480 + }, + { + "epoch": 7.365938751776715, + "learning_rate": 1.5413587449760143e-07, + "loss": 2.5333, + "step": 28500 + }, + { + "epoch": 7.37110737821424, + "learning_rate": 1.5410346168805912e-07, + "loss": 2.5771, + "step": 28520 + }, + { + "epoch": 7.376276004651764, + "learning_rate": 1.5407104887851678e-07, + "loss": 2.5225, + "step": 28540 + }, + { + "epoch": 7.381444631089288, + "learning_rate": 1.5403863606897445e-07, + "loss": 2.6159, + "step": 28560 + }, + { + "epoch": 7.386613257526812, + "learning_rate": 1.540062232594321e-07, + "loss": 2.6229, + "step": 28580 + }, + { + "epoch": 7.391781883964336, + "learning_rate": 1.539738104498898e-07, + "loss": 2.5326, + "step": 28600 + }, + { + "epoch": 7.396950510401861, + "learning_rate": 1.5394139764034746e-07, + "loss": 2.5116, + "step": 28620 + }, + { + "epoch": 7.402119136839385, + "learning_rate": 1.5390898483080512e-07, + "loss": 2.5276, + "step": 28640 + }, + { + "epoch": 7.407287763276909, + "learning_rate": 1.538765720212628e-07, + "loss": 2.5662, + "step": 28660 + }, + { + "epoch": 7.412456389714434, + "learning_rate": 1.5384415921172047e-07, + "loss": 2.5843, + "step": 28680 + }, + { + "epoch": 7.417625016151957, + "learning_rate": 1.5381174640217813e-07, + "loss": 2.6341, + "step": 28700 + }, + { + "epoch": 7.422793642589482, + "learning_rate": 1.537793335926358e-07, + "loss": 2.5741, + "step": 28720 + }, + { + "epoch": 7.427962269027006, + "learning_rate": 1.5374692078309346e-07, + "loss": 2.5658, + "step": 28740 + }, + { + "epoch": 7.43313089546453, + "learning_rate": 1.5371450797355114e-07, + "loss": 2.5909, + "step": 28760 + }, + { + "epoch": 7.438299521902055, + "learning_rate": 1.536820951640088e-07, + "loss": 2.5543, + "step": 28780 + }, + { + "epoch": 7.443468148339579, + "learning_rate": 1.5364968235446647e-07, + "loss": 2.5546, + "step": 28800 + }, + { + "epoch": 7.448636774777103, + "learning_rate": 1.5361726954492416e-07, + "loss": 2.5236, + "step": 28820 + }, + { + "epoch": 7.453805401214627, + "learning_rate": 1.5358485673538182e-07, + "loss": 2.5981, + "step": 28840 + }, + { + "epoch": 7.458974027652151, + "learning_rate": 1.5355244392583948e-07, + "loss": 2.5322, + "step": 28860 + }, + { + "epoch": 7.464142654089676, + "learning_rate": 1.5352003111629717e-07, + "loss": 2.6326, + "step": 28880 + }, + { + "epoch": 7.4693112805272, + "learning_rate": 1.5348761830675483e-07, + "loss": 2.5647, + "step": 28900 + }, + { + "epoch": 7.474479906964724, + "learning_rate": 1.534552054972125e-07, + "loss": 2.6036, + "step": 28920 + }, + { + "epoch": 7.479648533402249, + "learning_rate": 1.5342279268767015e-07, + "loss": 2.5672, + "step": 28940 + }, + { + "epoch": 7.484817159839773, + "learning_rate": 1.5339037987812782e-07, + "loss": 2.5102, + "step": 28960 + }, + { + "epoch": 7.4899857862772965, + "learning_rate": 1.533579670685855e-07, + "loss": 2.6278, + "step": 28980 + }, + { + "epoch": 7.495154412714821, + "learning_rate": 1.5332555425904317e-07, + "loss": 2.6092, + "step": 29000 + }, + { + "epoch": 7.500323039152345, + "learning_rate": 1.5329314144950083e-07, + "loss": 2.5685, + "step": 29020 + }, + { + "epoch": 7.50549166558987, + "learning_rate": 1.5326072863995852e-07, + "loss": 2.5203, + "step": 29040 + }, + { + "epoch": 7.510660292027394, + "learning_rate": 1.5322831583041618e-07, + "loss": 2.6086, + "step": 29060 + }, + { + "epoch": 7.515828918464918, + "learning_rate": 1.5319590302087384e-07, + "loss": 2.556, + "step": 29080 + }, + { + "epoch": 7.520997544902443, + "learning_rate": 1.531634902113315e-07, + "loss": 2.6068, + "step": 29100 + }, + { + "epoch": 7.526166171339966, + "learning_rate": 1.531310774017892e-07, + "loss": 2.5915, + "step": 29120 + }, + { + "epoch": 7.5313347977774905, + "learning_rate": 1.5309866459224685e-07, + "loss": 2.5853, + "step": 29140 + }, + { + "epoch": 7.536503424215015, + "learning_rate": 1.5306625178270451e-07, + "loss": 2.5933, + "step": 29160 + }, + { + "epoch": 7.541672050652539, + "learning_rate": 1.5303383897316218e-07, + "loss": 2.5732, + "step": 29180 + }, + { + "epoch": 7.546840677090064, + "learning_rate": 1.5300142616361986e-07, + "loss": 2.5925, + "step": 29200 + }, + { + "epoch": 7.552009303527588, + "learning_rate": 1.5296901335407753e-07, + "loss": 2.6029, + "step": 29220 + }, + { + "epoch": 7.557177929965111, + "learning_rate": 1.529366005445352e-07, + "loss": 2.6053, + "step": 29240 + }, + { + "epoch": 7.562346556402636, + "learning_rate": 1.5290418773499288e-07, + "loss": 2.5341, + "step": 29260 + }, + { + "epoch": 7.56751518284016, + "learning_rate": 1.5287177492545054e-07, + "loss": 2.4981, + "step": 29280 + }, + { + "epoch": 7.5726838092776845, + "learning_rate": 1.528393621159082e-07, + "loss": 2.5551, + "step": 29300 + }, + { + "epoch": 7.577852435715209, + "learning_rate": 1.5280694930636586e-07, + "loss": 2.5029, + "step": 29320 + }, + { + "epoch": 7.583021062152733, + "learning_rate": 1.5277453649682352e-07, + "loss": 2.6108, + "step": 29340 + }, + { + "epoch": 7.588189688590257, + "learning_rate": 1.527421236872812e-07, + "loss": 2.5652, + "step": 29360 + }, + { + "epoch": 7.593358315027781, + "learning_rate": 1.5270971087773887e-07, + "loss": 2.5423, + "step": 29380 + }, + { + "epoch": 7.598526941465305, + "learning_rate": 1.5267729806819654e-07, + "loss": 2.5123, + "step": 29400 + }, + { + "epoch": 7.60369556790283, + "learning_rate": 1.5264488525865422e-07, + "loss": 2.5787, + "step": 29420 + }, + { + "epoch": 7.608864194340354, + "learning_rate": 1.5261247244911189e-07, + "loss": 2.5894, + "step": 29440 + }, + { + "epoch": 7.6140328207778785, + "learning_rate": 1.5258005963956955e-07, + "loss": 2.5427, + "step": 29460 + }, + { + "epoch": 7.619201447215403, + "learning_rate": 1.5254764683002724e-07, + "loss": 2.6234, + "step": 29480 + }, + { + "epoch": 7.624370073652926, + "learning_rate": 1.525152340204849e-07, + "loss": 2.5201, + "step": 29500 + }, + { + "epoch": 7.629538700090451, + "learning_rate": 1.5248282121094256e-07, + "loss": 2.5933, + "step": 29520 + }, + { + "epoch": 7.634707326527975, + "learning_rate": 1.5245040840140022e-07, + "loss": 2.555, + "step": 29540 + }, + { + "epoch": 7.639875952965499, + "learning_rate": 1.5241799559185788e-07, + "loss": 2.5297, + "step": 29560 + }, + { + "epoch": 7.645044579403024, + "learning_rate": 1.5238558278231557e-07, + "loss": 2.574, + "step": 29580 + }, + { + "epoch": 7.650213205840548, + "learning_rate": 1.5235316997277323e-07, + "loss": 2.6062, + "step": 29600 + }, + { + "epoch": 7.6553818322780725, + "learning_rate": 1.523207571632309e-07, + "loss": 2.5437, + "step": 29620 + }, + { + "epoch": 7.660550458715596, + "learning_rate": 1.5228834435368858e-07, + "loss": 2.599, + "step": 29640 + }, + { + "epoch": 7.66571908515312, + "learning_rate": 1.5225593154414625e-07, + "loss": 2.5211, + "step": 29660 + }, + { + "epoch": 7.670887711590645, + "learning_rate": 1.522235187346039e-07, + "loss": 2.538, + "step": 29680 + }, + { + "epoch": 7.676056338028169, + "learning_rate": 1.5219110592506157e-07, + "loss": 2.5887, + "step": 29700 + }, + { + "epoch": 7.681224964465693, + "learning_rate": 1.5215869311551926e-07, + "loss": 2.5519, + "step": 29720 + }, + { + "epoch": 7.686393590903218, + "learning_rate": 1.5212628030597692e-07, + "loss": 2.5509, + "step": 29740 + }, + { + "epoch": 7.691562217340742, + "learning_rate": 1.5209386749643458e-07, + "loss": 2.5353, + "step": 29760 + }, + { + "epoch": 7.6967308437782656, + "learning_rate": 1.5206145468689224e-07, + "loss": 2.6387, + "step": 29780 + }, + { + "epoch": 7.70189947021579, + "learning_rate": 1.5202904187734993e-07, + "loss": 2.5547, + "step": 29800 + }, + { + "epoch": 7.707068096653314, + "learning_rate": 1.519966290678076e-07, + "loss": 2.65, + "step": 29820 + }, + { + "epoch": 7.712236723090839, + "learning_rate": 1.5196421625826526e-07, + "loss": 2.5733, + "step": 29840 + }, + { + "epoch": 7.717405349528363, + "learning_rate": 1.5193180344872294e-07, + "loss": 2.5608, + "step": 29860 + }, + { + "epoch": 7.722573975965887, + "learning_rate": 1.518993906391806e-07, + "loss": 2.5819, + "step": 29880 + }, + { + "epoch": 7.727742602403412, + "learning_rate": 1.5186697782963827e-07, + "loss": 2.6017, + "step": 29900 + }, + { + "epoch": 7.732911228840935, + "learning_rate": 1.5183456502009593e-07, + "loss": 2.5467, + "step": 29920 + }, + { + "epoch": 7.7380798552784595, + "learning_rate": 1.518021522105536e-07, + "loss": 2.558, + "step": 29940 + }, + { + "epoch": 7.743248481715984, + "learning_rate": 1.5176973940101128e-07, + "loss": 2.5716, + "step": 29960 + }, + { + "epoch": 7.748417108153508, + "learning_rate": 1.5173732659146894e-07, + "loss": 2.5765, + "step": 29980 + }, + { + "epoch": 7.753585734591033, + "learning_rate": 1.517049137819266e-07, + "loss": 2.6051, + "step": 30000 + }, + { + "epoch": 7.758754361028557, + "learning_rate": 1.516725009723843e-07, + "loss": 2.6049, + "step": 30020 + }, + { + "epoch": 7.763922987466081, + "learning_rate": 1.5164008816284195e-07, + "loss": 2.5621, + "step": 30040 + }, + { + "epoch": 7.769091613903605, + "learning_rate": 1.5160767535329962e-07, + "loss": 2.5449, + "step": 30060 + }, + { + "epoch": 7.774260240341129, + "learning_rate": 1.515752625437573e-07, + "loss": 2.5652, + "step": 30080 + }, + { + "epoch": 7.7794288667786535, + "learning_rate": 1.5154284973421497e-07, + "loss": 2.5216, + "step": 30100 + }, + { + "epoch": 7.784597493216178, + "learning_rate": 1.5151043692467263e-07, + "loss": 2.521, + "step": 30120 + }, + { + "epoch": 7.789766119653702, + "learning_rate": 1.514780241151303e-07, + "loss": 2.5561, + "step": 30140 + }, + { + "epoch": 7.794934746091227, + "learning_rate": 1.5144561130558795e-07, + "loss": 2.5601, + "step": 30160 + }, + { + "epoch": 7.800103372528751, + "learning_rate": 1.5141319849604564e-07, + "loss": 2.5599, + "step": 30180 + }, + { + "epoch": 7.805271998966274, + "learning_rate": 1.513807856865033e-07, + "loss": 2.6198, + "step": 30200 + }, + { + "epoch": 7.810440625403799, + "learning_rate": 1.5134837287696096e-07, + "loss": 2.6023, + "step": 30220 + }, + { + "epoch": 7.815609251841323, + "learning_rate": 1.5131596006741865e-07, + "loss": 2.5248, + "step": 30240 + }, + { + "epoch": 7.8207778782788475, + "learning_rate": 1.5128354725787631e-07, + "loss": 2.5971, + "step": 30260 + }, + { + "epoch": 7.825946504716372, + "learning_rate": 1.5125113444833398e-07, + "loss": 2.5428, + "step": 30280 + }, + { + "epoch": 7.831115131153896, + "learning_rate": 1.5121872163879164e-07, + "loss": 2.5757, + "step": 30300 + }, + { + "epoch": 7.83628375759142, + "learning_rate": 1.5118630882924933e-07, + "loss": 2.561, + "step": 30320 + }, + { + "epoch": 7.841452384028944, + "learning_rate": 1.51153896019707e-07, + "loss": 2.5586, + "step": 30340 + }, + { + "epoch": 7.846621010466468, + "learning_rate": 1.5112148321016465e-07, + "loss": 2.612, + "step": 30360 + }, + { + "epoch": 7.851789636903993, + "learning_rate": 1.510890704006223e-07, + "loss": 2.5565, + "step": 30380 + }, + { + "epoch": 7.856958263341517, + "learning_rate": 1.5105665759108e-07, + "loss": 2.5571, + "step": 30400 + }, + { + "epoch": 7.8621268897790415, + "learning_rate": 1.5102424478153766e-07, + "loss": 2.5895, + "step": 30420 + }, + { + "epoch": 7.867295516216565, + "learning_rate": 1.5099183197199532e-07, + "loss": 2.5987, + "step": 30440 + }, + { + "epoch": 7.872464142654089, + "learning_rate": 1.5095941916245298e-07, + "loss": 2.5516, + "step": 30460 + }, + { + "epoch": 7.877632769091614, + "learning_rate": 1.5092700635291067e-07, + "loss": 2.5649, + "step": 30480 + }, + { + "epoch": 7.882801395529138, + "learning_rate": 1.5089459354336834e-07, + "loss": 2.5498, + "step": 30500 + }, + { + "epoch": 7.887970021966662, + "learning_rate": 1.50862180733826e-07, + "loss": 2.6242, + "step": 30520 + }, + { + "epoch": 7.893138648404187, + "learning_rate": 1.5082976792428366e-07, + "loss": 2.5774, + "step": 30540 + }, + { + "epoch": 7.898307274841711, + "learning_rate": 1.5079735511474135e-07, + "loss": 2.5298, + "step": 30560 + }, + { + "epoch": 7.903475901279235, + "learning_rate": 1.50764942305199e-07, + "loss": 2.55, + "step": 30580 + }, + { + "epoch": 7.908644527716759, + "learning_rate": 1.5073252949565667e-07, + "loss": 2.5924, + "step": 30600 + }, + { + "epoch": 7.913813154154283, + "learning_rate": 1.5070011668611436e-07, + "loss": 2.515, + "step": 30620 + }, + { + "epoch": 7.918981780591808, + "learning_rate": 1.5066770387657202e-07, + "loss": 2.5605, + "step": 30640 + }, + { + "epoch": 7.924150407029332, + "learning_rate": 1.5063529106702968e-07, + "loss": 2.6101, + "step": 30660 + }, + { + "epoch": 7.929319033466856, + "learning_rate": 1.5060287825748734e-07, + "loss": 2.5612, + "step": 30680 + }, + { + "epoch": 7.934487659904381, + "learning_rate": 1.5057046544794503e-07, + "loss": 2.6157, + "step": 30700 + }, + { + "epoch": 7.939656286341904, + "learning_rate": 1.505380526384027e-07, + "loss": 2.5566, + "step": 30720 + }, + { + "epoch": 7.944824912779429, + "learning_rate": 1.5050563982886036e-07, + "loss": 2.5501, + "step": 30740 + }, + { + "epoch": 7.949993539216953, + "learning_rate": 1.5047322701931802e-07, + "loss": 2.5918, + "step": 30760 + }, + { + "epoch": 7.955162165654477, + "learning_rate": 1.504408142097757e-07, + "loss": 2.5722, + "step": 30780 + }, + { + "epoch": 7.960330792092002, + "learning_rate": 1.5040840140023337e-07, + "loss": 2.5233, + "step": 30800 + }, + { + "epoch": 7.965499418529526, + "learning_rate": 1.5037598859069103e-07, + "loss": 2.5999, + "step": 30820 + }, + { + "epoch": 7.97066804496705, + "learning_rate": 1.503435757811487e-07, + "loss": 2.5356, + "step": 30840 + }, + { + "epoch": 7.975836671404574, + "learning_rate": 1.5031116297160638e-07, + "loss": 2.5524, + "step": 30860 + }, + { + "epoch": 7.981005297842098, + "learning_rate": 1.5027875016206404e-07, + "loss": 2.6066, + "step": 30880 + }, + { + "epoch": 7.986173924279623, + "learning_rate": 1.502463373525217e-07, + "loss": 2.5563, + "step": 30900 + }, + { + "epoch": 7.991342550717147, + "learning_rate": 1.502139245429794e-07, + "loss": 2.5131, + "step": 30920 + }, + { + "epoch": 7.996511177154671, + "learning_rate": 1.5018151173343705e-07, + "loss": 2.5943, + "step": 30940 + }, + { + "epoch": 7.999870784339062, + "eval_bleu": 8.0263, + "eval_gen_len": 40.7538, + "eval_loss": 2.5299909114837646, + "eval_runtime": 863.7677, + "eval_samples_per_second": 1.994, + "eval_steps_per_second": 0.997, + "step": 30953 + }, + { + "epoch": 8.001679803592195, + "learning_rate": 1.5014909892389472e-07, + "loss": 2.5393, + "step": 30960 + }, + { + "epoch": 8.00684843002972, + "learning_rate": 1.5011668611435238e-07, + "loss": 2.4841, + "step": 30980 + }, + { + "epoch": 8.012017056467243, + "learning_rate": 1.5008427330481007e-07, + "loss": 2.5913, + "step": 31000 + }, + { + "epoch": 8.017185682904769, + "learning_rate": 1.5005186049526773e-07, + "loss": 2.5053, + "step": 31020 + }, + { + "epoch": 8.022354309342292, + "learning_rate": 1.500194476857254e-07, + "loss": 2.5612, + "step": 31040 + }, + { + "epoch": 8.027522935779816, + "learning_rate": 1.4998703487618305e-07, + "loss": 2.5478, + "step": 31060 + }, + { + "epoch": 8.032691562217341, + "learning_rate": 1.4995462206664074e-07, + "loss": 2.5783, + "step": 31080 + }, + { + "epoch": 8.037860188654864, + "learning_rate": 1.499222092570984e-07, + "loss": 2.5484, + "step": 31100 + }, + { + "epoch": 8.04302881509239, + "learning_rate": 1.4988979644755606e-07, + "loss": 2.6095, + "step": 31120 + }, + { + "epoch": 8.048197441529913, + "learning_rate": 1.4985738363801373e-07, + "loss": 2.5052, + "step": 31140 + }, + { + "epoch": 8.053366067967438, + "learning_rate": 1.4982497082847141e-07, + "loss": 2.4753, + "step": 31160 + }, + { + "epoch": 8.058534694404962, + "learning_rate": 1.4979255801892908e-07, + "loss": 2.5393, + "step": 31180 + }, + { + "epoch": 8.063703320842485, + "learning_rate": 1.4976014520938674e-07, + "loss": 2.5935, + "step": 31200 + }, + { + "epoch": 8.06887194728001, + "learning_rate": 1.497277323998444e-07, + "loss": 2.5911, + "step": 31220 + }, + { + "epoch": 8.074040573717534, + "learning_rate": 1.496953195903021e-07, + "loss": 2.572, + "step": 31240 + }, + { + "epoch": 8.07920920015506, + "learning_rate": 1.4966290678075975e-07, + "loss": 2.5707, + "step": 31260 + }, + { + "epoch": 8.084377826592583, + "learning_rate": 1.496304939712174e-07, + "loss": 2.5847, + "step": 31280 + }, + { + "epoch": 8.089546453030108, + "learning_rate": 1.4959808116167507e-07, + "loss": 2.5486, + "step": 31300 + }, + { + "epoch": 8.094715079467631, + "learning_rate": 1.4956566835213276e-07, + "loss": 2.5432, + "step": 31320 + }, + { + "epoch": 8.099883705905155, + "learning_rate": 1.4953325554259042e-07, + "loss": 2.5547, + "step": 31340 + }, + { + "epoch": 8.10505233234268, + "learning_rate": 1.4950084273304809e-07, + "loss": 2.5349, + "step": 31360 + }, + { + "epoch": 8.110220958780204, + "learning_rate": 1.4946842992350577e-07, + "loss": 2.5603, + "step": 31380 + }, + { + "epoch": 8.115389585217729, + "learning_rate": 1.4943601711396344e-07, + "loss": 2.5763, + "step": 31400 + }, + { + "epoch": 8.120558211655252, + "learning_rate": 1.494036043044211e-07, + "loss": 2.5194, + "step": 31420 + }, + { + "epoch": 8.125726838092778, + "learning_rate": 1.4937119149487876e-07, + "loss": 2.5414, + "step": 31440 + }, + { + "epoch": 8.130895464530301, + "learning_rate": 1.4933877868533645e-07, + "loss": 2.5459, + "step": 31460 + }, + { + "epoch": 8.136064090967825, + "learning_rate": 1.493063658757941e-07, + "loss": 2.5307, + "step": 31480 + }, + { + "epoch": 8.14123271740535, + "learning_rate": 1.4927395306625177e-07, + "loss": 2.4837, + "step": 31500 + }, + { + "epoch": 8.146401343842873, + "learning_rate": 1.4924154025670943e-07, + "loss": 2.5149, + "step": 31520 + }, + { + "epoch": 8.151569970280399, + "learning_rate": 1.4920912744716712e-07, + "loss": 2.4708, + "step": 31540 + }, + { + "epoch": 8.156738596717922, + "learning_rate": 1.4917671463762478e-07, + "loss": 2.5476, + "step": 31560 + }, + { + "epoch": 8.161907223155447, + "learning_rate": 1.4914430182808245e-07, + "loss": 2.5492, + "step": 31580 + }, + { + "epoch": 8.16707584959297, + "learning_rate": 1.491118890185401e-07, + "loss": 2.5697, + "step": 31600 + }, + { + "epoch": 8.172244476030494, + "learning_rate": 1.490794762089978e-07, + "loss": 2.5333, + "step": 31620 + }, + { + "epoch": 8.17741310246802, + "learning_rate": 1.4904706339945546e-07, + "loss": 2.5197, + "step": 31640 + }, + { + "epoch": 8.182581728905543, + "learning_rate": 1.4901465058991312e-07, + "loss": 2.4984, + "step": 31660 + }, + { + "epoch": 8.187750355343068, + "learning_rate": 1.489822377803708e-07, + "loss": 2.5087, + "step": 31680 + }, + { + "epoch": 8.192918981780592, + "learning_rate": 1.4894982497082847e-07, + "loss": 2.5842, + "step": 31700 + }, + { + "epoch": 8.198087608218117, + "learning_rate": 1.4891741216128613e-07, + "loss": 2.5249, + "step": 31720 + }, + { + "epoch": 8.20325623465564, + "learning_rate": 1.488849993517438e-07, + "loss": 2.5729, + "step": 31740 + }, + { + "epoch": 8.208424861093164, + "learning_rate": 1.4885258654220148e-07, + "loss": 2.5038, + "step": 31760 + }, + { + "epoch": 8.213593487530689, + "learning_rate": 1.4882017373265914e-07, + "loss": 2.512, + "step": 31780 + }, + { + "epoch": 8.218762113968213, + "learning_rate": 1.487877609231168e-07, + "loss": 2.6194, + "step": 31800 + }, + { + "epoch": 8.223930740405738, + "learning_rate": 1.4875534811357447e-07, + "loss": 2.5354, + "step": 31820 + }, + { + "epoch": 8.229099366843261, + "learning_rate": 1.4872293530403216e-07, + "loss": 2.577, + "step": 31840 + }, + { + "epoch": 8.234267993280785, + "learning_rate": 1.4869052249448982e-07, + "loss": 2.5439, + "step": 31860 + }, + { + "epoch": 8.23943661971831, + "learning_rate": 1.4865810968494748e-07, + "loss": 2.4729, + "step": 31880 + }, + { + "epoch": 8.244605246155833, + "learning_rate": 1.4862569687540514e-07, + "loss": 2.5979, + "step": 31900 + }, + { + "epoch": 8.249773872593359, + "learning_rate": 1.4859328406586283e-07, + "loss": 2.5676, + "step": 31920 + }, + { + "epoch": 8.254942499030882, + "learning_rate": 1.485608712563205e-07, + "loss": 2.5859, + "step": 31940 + }, + { + "epoch": 8.260111125468407, + "learning_rate": 1.4852845844677815e-07, + "loss": 2.558, + "step": 31960 + }, + { + "epoch": 8.265279751905931, + "learning_rate": 1.4849604563723584e-07, + "loss": 2.5678, + "step": 31980 + }, + { + "epoch": 8.270448378343456, + "learning_rate": 1.484636328276935e-07, + "loss": 2.5299, + "step": 32000 + }, + { + "epoch": 8.27561700478098, + "learning_rate": 1.4843122001815117e-07, + "loss": 2.5439, + "step": 32020 + }, + { + "epoch": 8.280785631218503, + "learning_rate": 1.4839880720860883e-07, + "loss": 2.4766, + "step": 32040 + }, + { + "epoch": 8.285954257656028, + "learning_rate": 1.4836639439906652e-07, + "loss": 2.5289, + "step": 32060 + }, + { + "epoch": 8.291122884093552, + "learning_rate": 1.4833398158952418e-07, + "loss": 2.5708, + "step": 32080 + }, + { + "epoch": 8.296291510531077, + "learning_rate": 1.4830156877998184e-07, + "loss": 2.4981, + "step": 32100 + }, + { + "epoch": 8.3014601369686, + "learning_rate": 1.482691559704395e-07, + "loss": 2.5352, + "step": 32120 + }, + { + "epoch": 8.306628763406124, + "learning_rate": 1.482367431608972e-07, + "loss": 2.558, + "step": 32140 + }, + { + "epoch": 8.31179738984365, + "learning_rate": 1.4820433035135485e-07, + "loss": 2.503, + "step": 32160 + }, + { + "epoch": 8.316966016281173, + "learning_rate": 1.4817191754181251e-07, + "loss": 2.5792, + "step": 32180 + }, + { + "epoch": 8.322134642718698, + "learning_rate": 1.4813950473227018e-07, + "loss": 2.5312, + "step": 32200 + }, + { + "epoch": 8.327303269156221, + "learning_rate": 1.4810709192272786e-07, + "loss": 2.5503, + "step": 32220 + }, + { + "epoch": 8.332471895593747, + "learning_rate": 1.4807467911318553e-07, + "loss": 2.5442, + "step": 32240 + }, + { + "epoch": 8.33764052203127, + "learning_rate": 1.480422663036432e-07, + "loss": 2.5578, + "step": 32260 + }, + { + "epoch": 8.342809148468794, + "learning_rate": 1.4800985349410088e-07, + "loss": 2.5253, + "step": 32280 + }, + { + "epoch": 8.347977774906319, + "learning_rate": 1.4797744068455854e-07, + "loss": 2.4777, + "step": 32300 + }, + { + "epoch": 8.353146401343842, + "learning_rate": 1.479450278750162e-07, + "loss": 2.525, + "step": 32320 + }, + { + "epoch": 8.358315027781368, + "learning_rate": 1.4791261506547386e-07, + "loss": 2.5804, + "step": 32340 + }, + { + "epoch": 8.363483654218891, + "learning_rate": 1.4788020225593155e-07, + "loss": 2.5387, + "step": 32360 + }, + { + "epoch": 8.368652280656416, + "learning_rate": 1.478477894463892e-07, + "loss": 2.5867, + "step": 32380 + }, + { + "epoch": 8.37382090709394, + "learning_rate": 1.4781537663684687e-07, + "loss": 2.5073, + "step": 32400 + }, + { + "epoch": 8.378989533531463, + "learning_rate": 1.4778296382730454e-07, + "loss": 2.5671, + "step": 32420 + }, + { + "epoch": 8.384158159968988, + "learning_rate": 1.4775055101776222e-07, + "loss": 2.5598, + "step": 32440 + }, + { + "epoch": 8.389326786406512, + "learning_rate": 1.4771813820821989e-07, + "loss": 2.552, + "step": 32460 + }, + { + "epoch": 8.394495412844037, + "learning_rate": 1.4768572539867755e-07, + "loss": 2.5682, + "step": 32480 + }, + { + "epoch": 8.39966403928156, + "learning_rate": 1.476533125891352e-07, + "loss": 2.5659, + "step": 32500 + }, + { + "epoch": 8.404832665719086, + "learning_rate": 1.476208997795929e-07, + "loss": 2.5365, + "step": 32520 + }, + { + "epoch": 8.41000129215661, + "learning_rate": 1.4758848697005056e-07, + "loss": 2.5348, + "step": 32540 + }, + { + "epoch": 8.415169918594133, + "learning_rate": 1.4755607416050822e-07, + "loss": 2.5808, + "step": 32560 + }, + { + "epoch": 8.420338545031658, + "learning_rate": 1.475236613509659e-07, + "loss": 2.5285, + "step": 32580 + }, + { + "epoch": 8.425507171469182, + "learning_rate": 1.4749124854142357e-07, + "loss": 2.5493, + "step": 32600 + }, + { + "epoch": 8.430675797906707, + "learning_rate": 1.4745883573188123e-07, + "loss": 2.5594, + "step": 32620 + }, + { + "epoch": 8.43584442434423, + "learning_rate": 1.474264229223389e-07, + "loss": 2.5149, + "step": 32640 + }, + { + "epoch": 8.441013050781756, + "learning_rate": 1.4739401011279656e-07, + "loss": 2.5238, + "step": 32660 + }, + { + "epoch": 8.446181677219279, + "learning_rate": 1.4736159730325425e-07, + "loss": 2.5444, + "step": 32680 + }, + { + "epoch": 8.451350303656803, + "learning_rate": 1.473291844937119e-07, + "loss": 2.5498, + "step": 32700 + }, + { + "epoch": 8.456518930094328, + "learning_rate": 1.4729677168416957e-07, + "loss": 2.5182, + "step": 32720 + }, + { + "epoch": 8.461687556531851, + "learning_rate": 1.4726435887462726e-07, + "loss": 2.5588, + "step": 32740 + }, + { + "epoch": 8.466856182969376, + "learning_rate": 1.4723194606508492e-07, + "loss": 2.5302, + "step": 32760 + }, + { + "epoch": 8.4720248094069, + "learning_rate": 1.4719953325554258e-07, + "loss": 2.5296, + "step": 32780 + }, + { + "epoch": 8.477193435844425, + "learning_rate": 1.4716712044600024e-07, + "loss": 2.5676, + "step": 32800 + }, + { + "epoch": 8.482362062281949, + "learning_rate": 1.471347076364579e-07, + "loss": 2.567, + "step": 32820 + }, + { + "epoch": 8.487530688719472, + "learning_rate": 1.471022948269156e-07, + "loss": 2.5531, + "step": 32840 + }, + { + "epoch": 8.492699315156997, + "learning_rate": 1.4706988201737326e-07, + "loss": 2.6131, + "step": 32860 + }, + { + "epoch": 8.49786794159452, + "learning_rate": 1.4703746920783092e-07, + "loss": 2.5818, + "step": 32880 + }, + { + "epoch": 8.503036568032046, + "learning_rate": 1.470050563982886e-07, + "loss": 2.542, + "step": 32900 + }, + { + "epoch": 8.50820519446957, + "learning_rate": 1.4697264358874627e-07, + "loss": 2.5656, + "step": 32920 + }, + { + "epoch": 8.513373820907095, + "learning_rate": 1.4694023077920393e-07, + "loss": 2.5548, + "step": 32940 + }, + { + "epoch": 8.518542447344618, + "learning_rate": 1.4690781796966162e-07, + "loss": 2.5295, + "step": 32960 + }, + { + "epoch": 8.523711073782142, + "learning_rate": 1.4687540516011925e-07, + "loss": 2.4961, + "step": 32980 + }, + { + "epoch": 8.528879700219667, + "learning_rate": 1.4684299235057694e-07, + "loss": 2.5361, + "step": 33000 + }, + { + "epoch": 8.53404832665719, + "learning_rate": 1.468105795410346e-07, + "loss": 2.5244, + "step": 33020 + }, + { + "epoch": 8.539216953094716, + "learning_rate": 1.4677816673149227e-07, + "loss": 2.4984, + "step": 33040 + }, + { + "epoch": 8.54438557953224, + "learning_rate": 1.4674575392194995e-07, + "loss": 2.5107, + "step": 33060 + }, + { + "epoch": 8.549554205969763, + "learning_rate": 1.4671334111240762e-07, + "loss": 2.6018, + "step": 33080 + }, + { + "epoch": 8.554722832407288, + "learning_rate": 1.4668092830286528e-07, + "loss": 2.5481, + "step": 33100 + }, + { + "epoch": 8.559891458844811, + "learning_rate": 1.4664851549332297e-07, + "loss": 2.5671, + "step": 33120 + }, + { + "epoch": 8.565060085282337, + "learning_rate": 1.466161026837806e-07, + "loss": 2.5587, + "step": 33140 + }, + { + "epoch": 8.57022871171986, + "learning_rate": 1.465836898742383e-07, + "loss": 2.4905, + "step": 33160 + }, + { + "epoch": 8.575397338157385, + "learning_rate": 1.4655127706469598e-07, + "loss": 2.5159, + "step": 33180 + }, + { + "epoch": 8.580565964594909, + "learning_rate": 1.4651886425515361e-07, + "loss": 2.5419, + "step": 33200 + }, + { + "epoch": 8.585734591032432, + "learning_rate": 1.464864514456113e-07, + "loss": 2.5811, + "step": 33220 + }, + { + "epoch": 8.590903217469958, + "learning_rate": 1.4645403863606896e-07, + "loss": 2.5175, + "step": 33240 + }, + { + "epoch": 8.596071843907481, + "learning_rate": 1.4642162582652663e-07, + "loss": 2.553, + "step": 33260 + }, + { + "epoch": 8.601240470345006, + "learning_rate": 1.4638921301698431e-07, + "loss": 2.5869, + "step": 33280 + }, + { + "epoch": 8.60640909678253, + "learning_rate": 1.4635680020744198e-07, + "loss": 2.5046, + "step": 33300 + }, + { + "epoch": 8.611577723220055, + "learning_rate": 1.4632438739789964e-07, + "loss": 2.5389, + "step": 33320 + }, + { + "epoch": 8.616746349657578, + "learning_rate": 1.4629197458835733e-07, + "loss": 2.5497, + "step": 33340 + }, + { + "epoch": 8.621914976095102, + "learning_rate": 1.4625956177881496e-07, + "loss": 2.5344, + "step": 33360 + }, + { + "epoch": 8.627083602532627, + "learning_rate": 1.4622714896927265e-07, + "loss": 2.6, + "step": 33380 + }, + { + "epoch": 8.63225222897015, + "learning_rate": 1.461947361597303e-07, + "loss": 2.5658, + "step": 33400 + }, + { + "epoch": 8.637420855407676, + "learning_rate": 1.4616232335018797e-07, + "loss": 2.539, + "step": 33420 + }, + { + "epoch": 8.6425894818452, + "learning_rate": 1.4612991054064566e-07, + "loss": 2.5898, + "step": 33440 + }, + { + "epoch": 8.647758108282725, + "learning_rate": 1.4609749773110332e-07, + "loss": 2.4808, + "step": 33460 + }, + { + "epoch": 8.652926734720248, + "learning_rate": 1.4606508492156099e-07, + "loss": 2.4885, + "step": 33480 + }, + { + "epoch": 8.658095361157772, + "learning_rate": 1.4603267211201867e-07, + "loss": 2.5437, + "step": 33500 + }, + { + "epoch": 8.663263987595297, + "learning_rate": 1.4600025930247634e-07, + "loss": 2.5715, + "step": 33520 + }, + { + "epoch": 8.66843261403282, + "learning_rate": 1.45967846492934e-07, + "loss": 2.5828, + "step": 33540 + }, + { + "epoch": 8.673601240470346, + "learning_rate": 1.4593543368339169e-07, + "loss": 2.4961, + "step": 33560 + }, + { + "epoch": 8.678769866907869, + "learning_rate": 1.4590302087384932e-07, + "loss": 2.5212, + "step": 33580 + }, + { + "epoch": 8.683938493345394, + "learning_rate": 1.45870608064307e-07, + "loss": 2.5321, + "step": 33600 + }, + { + "epoch": 8.689107119782918, + "learning_rate": 1.4583819525476467e-07, + "loss": 2.5146, + "step": 33620 + }, + { + "epoch": 8.694275746220441, + "learning_rate": 1.4580578244522233e-07, + "loss": 2.5531, + "step": 33640 + }, + { + "epoch": 8.699444372657966, + "learning_rate": 1.4577336963568002e-07, + "loss": 2.4989, + "step": 33660 + }, + { + "epoch": 8.70461299909549, + "learning_rate": 1.4574095682613768e-07, + "loss": 2.5502, + "step": 33680 + }, + { + "epoch": 8.709781625533015, + "learning_rate": 1.4570854401659535e-07, + "loss": 2.4926, + "step": 33700 + }, + { + "epoch": 8.714950251970539, + "learning_rate": 1.4567613120705303e-07, + "loss": 2.5462, + "step": 33720 + }, + { + "epoch": 8.720118878408064, + "learning_rate": 1.4564371839751067e-07, + "loss": 2.5373, + "step": 33740 + }, + { + "epoch": 8.725287504845587, + "learning_rate": 1.4561130558796836e-07, + "loss": 2.5745, + "step": 33760 + }, + { + "epoch": 8.73045613128311, + "learning_rate": 1.4557889277842605e-07, + "loss": 2.5468, + "step": 33780 + }, + { + "epoch": 8.735624757720636, + "learning_rate": 1.4554647996888368e-07, + "loss": 2.5654, + "step": 33800 + }, + { + "epoch": 8.74079338415816, + "learning_rate": 1.4551406715934137e-07, + "loss": 2.5433, + "step": 33820 + }, + { + "epoch": 8.745962010595685, + "learning_rate": 1.4548165434979903e-07, + "loss": 2.5865, + "step": 33840 + }, + { + "epoch": 8.751130637033208, + "learning_rate": 1.454492415402567e-07, + "loss": 2.5631, + "step": 33860 + }, + { + "epoch": 8.756299263470734, + "learning_rate": 1.4541682873071438e-07, + "loss": 2.5857, + "step": 33880 + }, + { + "epoch": 8.761467889908257, + "learning_rate": 1.4538441592117204e-07, + "loss": 2.5168, + "step": 33900 + }, + { + "epoch": 8.76663651634578, + "learning_rate": 1.453520031116297e-07, + "loss": 2.5316, + "step": 33920 + }, + { + "epoch": 8.771805142783306, + "learning_rate": 1.453195903020874e-07, + "loss": 2.5464, + "step": 33940 + }, + { + "epoch": 8.77697376922083, + "learning_rate": 1.4528717749254503e-07, + "loss": 2.5508, + "step": 33960 + }, + { + "epoch": 8.782142395658354, + "learning_rate": 1.4525476468300272e-07, + "loss": 2.5082, + "step": 33980 + }, + { + "epoch": 8.787311022095878, + "learning_rate": 1.4522235187346038e-07, + "loss": 2.502, + "step": 34000 + }, + { + "epoch": 8.792479648533401, + "learning_rate": 1.4518993906391804e-07, + "loss": 2.5586, + "step": 34020 + }, + { + "epoch": 8.797648274970927, + "learning_rate": 1.4515752625437573e-07, + "loss": 2.5555, + "step": 34040 + }, + { + "epoch": 8.80281690140845, + "learning_rate": 1.451251134448334e-07, + "loss": 2.5683, + "step": 34060 + }, + { + "epoch": 8.807985527845975, + "learning_rate": 1.4509270063529105e-07, + "loss": 2.5462, + "step": 34080 + }, + { + "epoch": 8.813154154283499, + "learning_rate": 1.4506028782574874e-07, + "loss": 2.4885, + "step": 34100 + }, + { + "epoch": 8.818322780721024, + "learning_rate": 1.450278750162064e-07, + "loss": 2.4856, + "step": 34120 + }, + { + "epoch": 8.823491407158548, + "learning_rate": 1.4499546220666407e-07, + "loss": 2.538, + "step": 34140 + }, + { + "epoch": 8.828660033596073, + "learning_rate": 1.4496304939712175e-07, + "loss": 2.5263, + "step": 34160 + }, + { + "epoch": 8.833828660033596, + "learning_rate": 1.449306365875794e-07, + "loss": 2.5397, + "step": 34180 + }, + { + "epoch": 8.83899728647112, + "learning_rate": 1.4489822377803708e-07, + "loss": 2.5295, + "step": 34200 + }, + { + "epoch": 8.844165912908645, + "learning_rate": 1.4486581096849474e-07, + "loss": 2.5853, + "step": 34220 + }, + { + "epoch": 8.849334539346168, + "learning_rate": 1.448333981589524e-07, + "loss": 2.5566, + "step": 34240 + }, + { + "epoch": 8.854503165783694, + "learning_rate": 1.448009853494101e-07, + "loss": 2.5319, + "step": 34260 + }, + { + "epoch": 8.859671792221217, + "learning_rate": 1.4476857253986775e-07, + "loss": 2.4863, + "step": 34280 + }, + { + "epoch": 8.86484041865874, + "learning_rate": 1.4473615973032541e-07, + "loss": 2.5584, + "step": 34300 + }, + { + "epoch": 8.870009045096266, + "learning_rate": 1.447037469207831e-07, + "loss": 2.5321, + "step": 34320 + }, + { + "epoch": 8.87517767153379, + "learning_rate": 1.4467133411124074e-07, + "loss": 2.5304, + "step": 34340 + }, + { + "epoch": 8.880346297971315, + "learning_rate": 1.4463892130169843e-07, + "loss": 2.5564, + "step": 34360 + }, + { + "epoch": 8.885514924408838, + "learning_rate": 1.4460650849215611e-07, + "loss": 2.5027, + "step": 34380 + }, + { + "epoch": 8.890683550846363, + "learning_rate": 1.4457409568261375e-07, + "loss": 2.4922, + "step": 34400 + }, + { + "epoch": 8.895852177283887, + "learning_rate": 1.4454168287307144e-07, + "loss": 2.5166, + "step": 34420 + }, + { + "epoch": 8.90102080372141, + "learning_rate": 1.445092700635291e-07, + "loss": 2.527, + "step": 34440 + }, + { + "epoch": 8.906189430158935, + "learning_rate": 1.4447685725398676e-07, + "loss": 2.5665, + "step": 34460 + }, + { + "epoch": 8.911358056596459, + "learning_rate": 1.4444444444444445e-07, + "loss": 2.5615, + "step": 34480 + }, + { + "epoch": 8.916526683033984, + "learning_rate": 1.444120316349021e-07, + "loss": 2.4899, + "step": 34500 + }, + { + "epoch": 8.921695309471508, + "learning_rate": 1.4437961882535977e-07, + "loss": 2.5012, + "step": 34520 + }, + { + "epoch": 8.926863935909033, + "learning_rate": 1.4434720601581746e-07, + "loss": 2.5526, + "step": 34540 + }, + { + "epoch": 8.932032562346556, + "learning_rate": 1.443147932062751e-07, + "loss": 2.5167, + "step": 34560 + }, + { + "epoch": 8.93720118878408, + "learning_rate": 1.4428238039673279e-07, + "loss": 2.5177, + "step": 34580 + }, + { + "epoch": 8.942369815221605, + "learning_rate": 1.4424996758719045e-07, + "loss": 2.5548, + "step": 34600 + }, + { + "epoch": 8.947538441659129, + "learning_rate": 1.442175547776481e-07, + "loss": 2.5742, + "step": 34620 + }, + { + "epoch": 8.952707068096654, + "learning_rate": 1.441851419681058e-07, + "loss": 2.5149, + "step": 34640 + }, + { + "epoch": 8.957875694534177, + "learning_rate": 1.4415272915856346e-07, + "loss": 2.5042, + "step": 34660 + }, + { + "epoch": 8.963044320971703, + "learning_rate": 1.4412031634902112e-07, + "loss": 2.5076, + "step": 34680 + }, + { + "epoch": 8.968212947409226, + "learning_rate": 1.440879035394788e-07, + "loss": 2.5282, + "step": 34700 + }, + { + "epoch": 8.97338157384675, + "learning_rate": 1.4405549072993647e-07, + "loss": 2.5373, + "step": 34720 + }, + { + "epoch": 8.978550200284275, + "learning_rate": 1.4402307792039413e-07, + "loss": 2.5556, + "step": 34740 + }, + { + "epoch": 8.983718826721798, + "learning_rate": 1.4399066511085182e-07, + "loss": 2.5413, + "step": 34760 + }, + { + "epoch": 8.988887453159323, + "learning_rate": 1.4395825230130946e-07, + "loss": 2.5563, + "step": 34780 + }, + { + "epoch": 8.994056079596847, + "learning_rate": 1.4392583949176715e-07, + "loss": 2.5267, + "step": 34800 + }, + { + "epoch": 8.999224706034372, + "learning_rate": 1.438934266822248e-07, + "loss": 2.5622, + "step": 34820 + }, + { + "epoch": 9.0, + "eval_bleu": 10.2353, + "eval_gen_len": 38.2607, + "eval_loss": 2.504971742630005, + "eval_runtime": 830.1656, + "eval_samples_per_second": 2.074, + "eval_steps_per_second": 1.037, + "step": 34823 + }, + { + "epoch": 9.004393332471896, + "learning_rate": 1.4386101387268247e-07, + "loss": 2.5627, + "step": 34840 + }, + { + "epoch": 9.00956195890942, + "learning_rate": 1.4382860106314016e-07, + "loss": 2.538, + "step": 34860 + }, + { + "epoch": 9.014730585346944, + "learning_rate": 1.4379618825359782e-07, + "loss": 2.5561, + "step": 34880 + }, + { + "epoch": 9.019899211784468, + "learning_rate": 1.4376377544405548e-07, + "loss": 2.5216, + "step": 34900 + }, + { + "epoch": 9.025067838221993, + "learning_rate": 1.4373136263451317e-07, + "loss": 2.4699, + "step": 34920 + }, + { + "epoch": 9.030236464659517, + "learning_rate": 1.436989498249708e-07, + "loss": 2.5566, + "step": 34940 + }, + { + "epoch": 9.035405091097042, + "learning_rate": 1.436665370154285e-07, + "loss": 2.4698, + "step": 34960 + }, + { + "epoch": 9.040573717534565, + "learning_rate": 1.4363412420588616e-07, + "loss": 2.4995, + "step": 34980 + }, + { + "epoch": 9.045742343972089, + "learning_rate": 1.4360171139634382e-07, + "loss": 2.522, + "step": 35000 + }, + { + "epoch": 9.050910970409614, + "learning_rate": 1.435692985868015e-07, + "loss": 2.5507, + "step": 35020 + }, + { + "epoch": 9.056079596847137, + "learning_rate": 1.4353688577725917e-07, + "loss": 2.4685, + "step": 35040 + }, + { + "epoch": 9.061248223284663, + "learning_rate": 1.4350447296771683e-07, + "loss": 2.4728, + "step": 35060 + }, + { + "epoch": 9.066416849722186, + "learning_rate": 1.4347206015817452e-07, + "loss": 2.6299, + "step": 35080 + }, + { + "epoch": 9.071585476159711, + "learning_rate": 1.4343964734863218e-07, + "loss": 2.5499, + "step": 35100 + }, + { + "epoch": 9.076754102597235, + "learning_rate": 1.4340723453908984e-07, + "loss": 2.5478, + "step": 35120 + }, + { + "epoch": 9.081922729034758, + "learning_rate": 1.433748217295475e-07, + "loss": 2.4566, + "step": 35140 + }, + { + "epoch": 9.087091355472284, + "learning_rate": 1.4334240892000516e-07, + "loss": 2.4931, + "step": 35160 + }, + { + "epoch": 9.092259981909807, + "learning_rate": 1.4330999611046285e-07, + "loss": 2.5343, + "step": 35180 + }, + { + "epoch": 9.097428608347332, + "learning_rate": 1.4327758330092052e-07, + "loss": 2.5651, + "step": 35200 + }, + { + "epoch": 9.102597234784856, + "learning_rate": 1.4324517049137818e-07, + "loss": 2.5226, + "step": 35220 + }, + { + "epoch": 9.10776586122238, + "learning_rate": 1.4321275768183587e-07, + "loss": 2.5504, + "step": 35240 + }, + { + "epoch": 9.112934487659905, + "learning_rate": 1.4318034487229353e-07, + "loss": 2.5127, + "step": 35260 + }, + { + "epoch": 9.118103114097428, + "learning_rate": 1.431479320627512e-07, + "loss": 2.4964, + "step": 35280 + }, + { + "epoch": 9.123271740534953, + "learning_rate": 1.4311551925320885e-07, + "loss": 2.5396, + "step": 35300 + }, + { + "epoch": 9.128440366972477, + "learning_rate": 1.4308310644366654e-07, + "loss": 2.5303, + "step": 35320 + }, + { + "epoch": 9.133608993410002, + "learning_rate": 1.430506936341242e-07, + "loss": 2.5574, + "step": 35340 + }, + { + "epoch": 9.138777619847525, + "learning_rate": 1.4301828082458186e-07, + "loss": 2.5649, + "step": 35360 + }, + { + "epoch": 9.143946246285049, + "learning_rate": 1.4298586801503952e-07, + "loss": 2.5113, + "step": 35380 + }, + { + "epoch": 9.149114872722574, + "learning_rate": 1.429534552054972e-07, + "loss": 2.4826, + "step": 35400 + }, + { + "epoch": 9.154283499160098, + "learning_rate": 1.4292104239595488e-07, + "loss": 2.5148, + "step": 35420 + }, + { + "epoch": 9.159452125597623, + "learning_rate": 1.4288862958641254e-07, + "loss": 2.5568, + "step": 35440 + }, + { + "epoch": 9.164620752035146, + "learning_rate": 1.428562167768702e-07, + "loss": 2.5475, + "step": 35460 + }, + { + "epoch": 9.169789378472672, + "learning_rate": 1.428238039673279e-07, + "loss": 2.5335, + "step": 35480 + }, + { + "epoch": 9.174958004910195, + "learning_rate": 1.4279139115778555e-07, + "loss": 2.494, + "step": 35500 + }, + { + "epoch": 9.180126631347719, + "learning_rate": 1.427589783482432e-07, + "loss": 2.5568, + "step": 35520 + }, + { + "epoch": 9.185295257785244, + "learning_rate": 1.4272656553870087e-07, + "loss": 2.5425, + "step": 35540 + }, + { + "epoch": 9.190463884222767, + "learning_rate": 1.4269415272915856e-07, + "loss": 2.5672, + "step": 35560 + }, + { + "epoch": 9.195632510660293, + "learning_rate": 1.4266173991961622e-07, + "loss": 2.5532, + "step": 35580 + }, + { + "epoch": 9.200801137097816, + "learning_rate": 1.4262932711007388e-07, + "loss": 2.4934, + "step": 35600 + }, + { + "epoch": 9.205969763535341, + "learning_rate": 1.4259691430053157e-07, + "loss": 2.5009, + "step": 35620 + }, + { + "epoch": 9.211138389972865, + "learning_rate": 1.4256450149098924e-07, + "loss": 2.4935, + "step": 35640 + }, + { + "epoch": 9.216307016410388, + "learning_rate": 1.425320886814469e-07, + "loss": 2.5217, + "step": 35660 + }, + { + "epoch": 9.221475642847913, + "learning_rate": 1.4249967587190456e-07, + "loss": 2.5246, + "step": 35680 + }, + { + "epoch": 9.226644269285437, + "learning_rate": 1.4246726306236225e-07, + "loss": 2.4851, + "step": 35700 + }, + { + "epoch": 9.231812895722962, + "learning_rate": 1.424348502528199e-07, + "loss": 2.585, + "step": 35720 + }, + { + "epoch": 9.236981522160486, + "learning_rate": 1.4240243744327757e-07, + "loss": 2.5602, + "step": 35740 + }, + { + "epoch": 9.24215014859801, + "learning_rate": 1.4237002463373523e-07, + "loss": 2.4916, + "step": 35760 + }, + { + "epoch": 9.247318775035534, + "learning_rate": 1.4233761182419292e-07, + "loss": 2.584, + "step": 35780 + }, + { + "epoch": 9.252487401473058, + "learning_rate": 1.4230519901465058e-07, + "loss": 2.5671, + "step": 35800 + }, + { + "epoch": 9.257656027910583, + "learning_rate": 1.4227278620510824e-07, + "loss": 2.5741, + "step": 35820 + }, + { + "epoch": 9.262824654348107, + "learning_rate": 1.422403733955659e-07, + "loss": 2.544, + "step": 35840 + }, + { + "epoch": 9.267993280785632, + "learning_rate": 1.422079605860236e-07, + "loss": 2.5616, + "step": 35860 + }, + { + "epoch": 9.273161907223155, + "learning_rate": 1.4217554777648126e-07, + "loss": 2.5236, + "step": 35880 + }, + { + "epoch": 9.27833053366068, + "learning_rate": 1.4214313496693892e-07, + "loss": 2.4909, + "step": 35900 + }, + { + "epoch": 9.283499160098204, + "learning_rate": 1.421107221573966e-07, + "loss": 2.514, + "step": 35920 + }, + { + "epoch": 9.288667786535727, + "learning_rate": 1.4207830934785427e-07, + "loss": 2.6055, + "step": 35940 + }, + { + "epoch": 9.293836412973253, + "learning_rate": 1.4204589653831193e-07, + "loss": 2.5403, + "step": 35960 + }, + { + "epoch": 9.299005039410776, + "learning_rate": 1.420134837287696e-07, + "loss": 2.4817, + "step": 35980 + }, + { + "epoch": 9.304173665848301, + "learning_rate": 1.4198107091922728e-07, + "loss": 2.4968, + "step": 36000 + }, + { + "epoch": 9.309342292285825, + "learning_rate": 1.4194865810968494e-07, + "loss": 2.5205, + "step": 36020 + }, + { + "epoch": 9.31451091872335, + "learning_rate": 1.419162453001426e-07, + "loss": 2.4925, + "step": 36040 + }, + { + "epoch": 9.319679545160874, + "learning_rate": 1.4188383249060027e-07, + "loss": 2.4745, + "step": 36060 + }, + { + "epoch": 9.324848171598397, + "learning_rate": 1.4185141968105795e-07, + "loss": 2.5256, + "step": 36080 + }, + { + "epoch": 9.330016798035922, + "learning_rate": 1.4181900687151562e-07, + "loss": 2.5286, + "step": 36100 + }, + { + "epoch": 9.335185424473446, + "learning_rate": 1.4178659406197328e-07, + "loss": 2.5655, + "step": 36120 + }, + { + "epoch": 9.340354050910971, + "learning_rate": 1.4175418125243094e-07, + "loss": 2.5093, + "step": 36140 + }, + { + "epoch": 9.345522677348495, + "learning_rate": 1.4172176844288863e-07, + "loss": 2.5257, + "step": 36160 + }, + { + "epoch": 9.350691303786018, + "learning_rate": 1.416893556333463e-07, + "loss": 2.5267, + "step": 36180 + }, + { + "epoch": 9.355859930223543, + "learning_rate": 1.4165694282380395e-07, + "loss": 2.4716, + "step": 36200 + }, + { + "epoch": 9.361028556661067, + "learning_rate": 1.4162453001426164e-07, + "loss": 2.5255, + "step": 36220 + }, + { + "epoch": 9.366197183098592, + "learning_rate": 1.415921172047193e-07, + "loss": 2.5087, + "step": 36240 + }, + { + "epoch": 9.371365809536115, + "learning_rate": 1.4155970439517696e-07, + "loss": 2.5494, + "step": 36260 + }, + { + "epoch": 9.37653443597364, + "learning_rate": 1.4152729158563463e-07, + "loss": 2.5466, + "step": 36280 + }, + { + "epoch": 9.381703062411164, + "learning_rate": 1.4149487877609231e-07, + "loss": 2.4686, + "step": 36300 + }, + { + "epoch": 9.386871688848688, + "learning_rate": 1.4146246596654998e-07, + "loss": 2.5361, + "step": 36320 + }, + { + "epoch": 9.392040315286213, + "learning_rate": 1.4143005315700764e-07, + "loss": 2.4786, + "step": 36340 + }, + { + "epoch": 9.397208941723736, + "learning_rate": 1.413976403474653e-07, + "loss": 2.5012, + "step": 36360 + }, + { + "epoch": 9.402377568161262, + "learning_rate": 1.41365227537923e-07, + "loss": 2.5728, + "step": 36380 + }, + { + "epoch": 9.407546194598785, + "learning_rate": 1.4133281472838065e-07, + "loss": 2.513, + "step": 36400 + }, + { + "epoch": 9.41271482103631, + "learning_rate": 1.413004019188383e-07, + "loss": 2.5833, + "step": 36420 + }, + { + "epoch": 9.417883447473834, + "learning_rate": 1.4126798910929597e-07, + "loss": 2.5436, + "step": 36440 + }, + { + "epoch": 9.423052073911357, + "learning_rate": 1.4123557629975366e-07, + "loss": 2.5017, + "step": 36460 + }, + { + "epoch": 9.428220700348882, + "learning_rate": 1.4120316349021132e-07, + "loss": 2.4708, + "step": 36480 + }, + { + "epoch": 9.433389326786406, + "learning_rate": 1.4117075068066899e-07, + "loss": 2.5594, + "step": 36500 + }, + { + "epoch": 9.438557953223931, + "learning_rate": 1.4113833787112667e-07, + "loss": 2.5095, + "step": 36520 + }, + { + "epoch": 9.443726579661455, + "learning_rate": 1.4110592506158434e-07, + "loss": 2.5153, + "step": 36540 + }, + { + "epoch": 9.44889520609898, + "learning_rate": 1.41073512252042e-07, + "loss": 2.4579, + "step": 36560 + }, + { + "epoch": 9.454063832536503, + "learning_rate": 1.4104109944249966e-07, + "loss": 2.5145, + "step": 36580 + }, + { + "epoch": 9.459232458974027, + "learning_rate": 1.4100868663295735e-07, + "loss": 2.5117, + "step": 36600 + }, + { + "epoch": 9.464401085411552, + "learning_rate": 1.40976273823415e-07, + "loss": 2.571, + "step": 36620 + }, + { + "epoch": 9.469569711849076, + "learning_rate": 1.4094386101387267e-07, + "loss": 2.5494, + "step": 36640 + }, + { + "epoch": 9.4747383382866, + "learning_rate": 1.4091144820433033e-07, + "loss": 2.4598, + "step": 36660 + }, + { + "epoch": 9.479906964724124, + "learning_rate": 1.4087903539478802e-07, + "loss": 2.5034, + "step": 36680 + }, + { + "epoch": 9.48507559116165, + "learning_rate": 1.4084662258524568e-07, + "loss": 2.5229, + "step": 36700 + }, + { + "epoch": 9.490244217599173, + "learning_rate": 1.4081420977570335e-07, + "loss": 2.5249, + "step": 36720 + }, + { + "epoch": 9.495412844036696, + "learning_rate": 1.40781796966161e-07, + "loss": 2.4883, + "step": 36740 + }, + { + "epoch": 9.500581470474222, + "learning_rate": 1.407493841566187e-07, + "loss": 2.5082, + "step": 36760 + }, + { + "epoch": 9.505750096911745, + "learning_rate": 1.4071697134707636e-07, + "loss": 2.5209, + "step": 36780 + }, + { + "epoch": 9.51091872334927, + "learning_rate": 1.4068455853753402e-07, + "loss": 2.5263, + "step": 36800 + }, + { + "epoch": 9.516087349786794, + "learning_rate": 1.406521457279917e-07, + "loss": 2.4904, + "step": 36820 + }, + { + "epoch": 9.52125597622432, + "learning_rate": 1.4061973291844937e-07, + "loss": 2.511, + "step": 36840 + }, + { + "epoch": 9.526424602661843, + "learning_rate": 1.4058732010890703e-07, + "loss": 2.4821, + "step": 36860 + }, + { + "epoch": 9.531593229099366, + "learning_rate": 1.405549072993647e-07, + "loss": 2.5053, + "step": 36880 + }, + { + "epoch": 9.536761855536891, + "learning_rate": 1.4052249448982238e-07, + "loss": 2.518, + "step": 36900 + }, + { + "epoch": 9.541930481974415, + "learning_rate": 1.4049008168028004e-07, + "loss": 2.4693, + "step": 36920 + }, + { + "epoch": 9.54709910841194, + "learning_rate": 1.404576688707377e-07, + "loss": 2.5722, + "step": 36940 + }, + { + "epoch": 9.552267734849464, + "learning_rate": 1.4042525606119537e-07, + "loss": 2.4733, + "step": 36960 + }, + { + "epoch": 9.557436361286989, + "learning_rate": 1.4039284325165306e-07, + "loss": 2.5035, + "step": 36980 + }, + { + "epoch": 9.562604987724512, + "learning_rate": 1.4036043044211072e-07, + "loss": 2.5271, + "step": 37000 + }, + { + "epoch": 9.567773614162036, + "learning_rate": 1.4032801763256838e-07, + "loss": 2.5335, + "step": 37020 + }, + { + "epoch": 9.572942240599561, + "learning_rate": 1.4029560482302604e-07, + "loss": 2.4517, + "step": 37040 + }, + { + "epoch": 9.578110867037084, + "learning_rate": 1.4026319201348373e-07, + "loss": 2.5086, + "step": 37060 + }, + { + "epoch": 9.58327949347461, + "learning_rate": 1.402307792039414e-07, + "loss": 2.5409, + "step": 37080 + }, + { + "epoch": 9.588448119912133, + "learning_rate": 1.4019836639439905e-07, + "loss": 2.512, + "step": 37100 + }, + { + "epoch": 9.593616746349657, + "learning_rate": 1.4016595358485674e-07, + "loss": 2.5074, + "step": 37120 + }, + { + "epoch": 9.598785372787182, + "learning_rate": 1.401335407753144e-07, + "loss": 2.5078, + "step": 37140 + }, + { + "epoch": 9.603953999224705, + "learning_rate": 1.4010112796577207e-07, + "loss": 2.5574, + "step": 37160 + }, + { + "epoch": 9.60912262566223, + "learning_rate": 1.4006871515622973e-07, + "loss": 2.5459, + "step": 37180 + }, + { + "epoch": 9.614291252099754, + "learning_rate": 1.4003630234668742e-07, + "loss": 2.4473, + "step": 37200 + }, + { + "epoch": 9.61945987853728, + "learning_rate": 1.4000388953714508e-07, + "loss": 2.5423, + "step": 37220 + }, + { + "epoch": 9.624628504974803, + "learning_rate": 1.3997147672760274e-07, + "loss": 2.4988, + "step": 37240 + }, + { + "epoch": 9.629797131412328, + "learning_rate": 1.399390639180604e-07, + "loss": 2.6236, + "step": 37260 + }, + { + "epoch": 9.634965757849852, + "learning_rate": 1.399066511085181e-07, + "loss": 2.5718, + "step": 37280 + }, + { + "epoch": 9.640134384287375, + "learning_rate": 1.3987423829897575e-07, + "loss": 2.5012, + "step": 37300 + }, + { + "epoch": 9.6453030107249, + "learning_rate": 1.3984182548943341e-07, + "loss": 2.5536, + "step": 37320 + }, + { + "epoch": 9.650471637162424, + "learning_rate": 1.3980941267989108e-07, + "loss": 2.5416, + "step": 37340 + }, + { + "epoch": 9.655640263599949, + "learning_rate": 1.3977699987034876e-07, + "loss": 2.4864, + "step": 37360 + }, + { + "epoch": 9.660808890037472, + "learning_rate": 1.3974458706080643e-07, + "loss": 2.5365, + "step": 37380 + }, + { + "epoch": 9.665977516474996, + "learning_rate": 1.397121742512641e-07, + "loss": 2.5111, + "step": 37400 + }, + { + "epoch": 9.671146142912521, + "learning_rate": 1.3967976144172178e-07, + "loss": 2.4737, + "step": 37420 + }, + { + "epoch": 9.676314769350045, + "learning_rate": 1.3964734863217944e-07, + "loss": 2.5373, + "step": 37440 + }, + { + "epoch": 9.68148339578757, + "learning_rate": 1.396149358226371e-07, + "loss": 2.4991, + "step": 37460 + }, + { + "epoch": 9.686652022225093, + "learning_rate": 1.3958252301309476e-07, + "loss": 2.5186, + "step": 37480 + }, + { + "epoch": 9.691820648662619, + "learning_rate": 1.3955011020355242e-07, + "loss": 2.4701, + "step": 37500 + }, + { + "epoch": 9.696989275100142, + "learning_rate": 1.395176973940101e-07, + "loss": 2.5169, + "step": 37520 + }, + { + "epoch": 9.702157901537666, + "learning_rate": 1.3948528458446777e-07, + "loss": 2.4691, + "step": 37540 + }, + { + "epoch": 9.70732652797519, + "learning_rate": 1.3945287177492544e-07, + "loss": 2.5278, + "step": 37560 + }, + { + "epoch": 9.712495154412714, + "learning_rate": 1.3942045896538312e-07, + "loss": 2.4778, + "step": 37580 + }, + { + "epoch": 9.71766378085024, + "learning_rate": 1.3938804615584079e-07, + "loss": 2.5367, + "step": 37600 + }, + { + "epoch": 9.722832407287763, + "learning_rate": 1.3935563334629845e-07, + "loss": 2.5249, + "step": 37620 + }, + { + "epoch": 9.728001033725288, + "learning_rate": 1.393232205367561e-07, + "loss": 2.503, + "step": 37640 + }, + { + "epoch": 9.733169660162812, + "learning_rate": 1.3929080772721377e-07, + "loss": 2.514, + "step": 37660 + }, + { + "epoch": 9.738338286600335, + "learning_rate": 1.3925839491767146e-07, + "loss": 2.5158, + "step": 37680 + }, + { + "epoch": 9.74350691303786, + "learning_rate": 1.3922598210812912e-07, + "loss": 2.527, + "step": 37700 + }, + { + "epoch": 9.748675539475384, + "learning_rate": 1.3919356929858678e-07, + "loss": 2.5435, + "step": 37720 + }, + { + "epoch": 9.75384416591291, + "learning_rate": 1.3916115648904447e-07, + "loss": 2.5464, + "step": 37740 + }, + { + "epoch": 9.759012792350433, + "learning_rate": 1.3912874367950213e-07, + "loss": 2.5952, + "step": 37760 + }, + { + "epoch": 9.764181418787958, + "learning_rate": 1.390963308699598e-07, + "loss": 2.5066, + "step": 37780 + }, + { + "epoch": 9.769350045225481, + "learning_rate": 1.3906391806041748e-07, + "loss": 2.4973, + "step": 37800 + }, + { + "epoch": 9.774518671663005, + "learning_rate": 1.3903150525087512e-07, + "loss": 2.5344, + "step": 37820 + }, + { + "epoch": 9.77968729810053, + "learning_rate": 1.389990924413328e-07, + "loss": 2.4959, + "step": 37840 + }, + { + "epoch": 9.784855924538054, + "learning_rate": 1.3896667963179047e-07, + "loss": 2.5006, + "step": 37860 + }, + { + "epoch": 9.790024550975579, + "learning_rate": 1.3893426682224813e-07, + "loss": 2.5334, + "step": 37880 + }, + { + "epoch": 9.795193177413102, + "learning_rate": 1.3890185401270582e-07, + "loss": 2.5255, + "step": 37900 + }, + { + "epoch": 9.800361803850627, + "learning_rate": 1.3886944120316348e-07, + "loss": 2.5221, + "step": 37920 + }, + { + "epoch": 9.805530430288151, + "learning_rate": 1.3883702839362114e-07, + "loss": 2.5832, + "step": 37940 + }, + { + "epoch": 9.810699056725674, + "learning_rate": 1.3880461558407883e-07, + "loss": 2.5268, + "step": 37960 + }, + { + "epoch": 9.8158676831632, + "learning_rate": 1.387722027745365e-07, + "loss": 2.6145, + "step": 37980 + }, + { + "epoch": 9.821036309600723, + "learning_rate": 1.3873978996499416e-07, + "loss": 2.5483, + "step": 38000 + }, + { + "epoch": 9.826204936038248, + "learning_rate": 1.3870737715545184e-07, + "loss": 2.4529, + "step": 38020 + }, + { + "epoch": 9.831373562475772, + "learning_rate": 1.3867496434590948e-07, + "loss": 2.5547, + "step": 38040 + }, + { + "epoch": 9.836542188913295, + "learning_rate": 1.3864255153636717e-07, + "loss": 2.4675, + "step": 38060 + }, + { + "epoch": 9.84171081535082, + "learning_rate": 1.3861013872682483e-07, + "loss": 2.4782, + "step": 38080 + }, + { + "epoch": 9.846879441788344, + "learning_rate": 1.385777259172825e-07, + "loss": 2.527, + "step": 38100 + }, + { + "epoch": 9.85204806822587, + "learning_rate": 1.3854531310774018e-07, + "loss": 2.5285, + "step": 38120 + }, + { + "epoch": 9.857216694663393, + "learning_rate": 1.3851290029819784e-07, + "loss": 2.5057, + "step": 38140 + }, + { + "epoch": 9.862385321100918, + "learning_rate": 1.384804874886555e-07, + "loss": 2.4832, + "step": 38160 + }, + { + "epoch": 9.867553947538442, + "learning_rate": 1.384480746791132e-07, + "loss": 2.4826, + "step": 38180 + }, + { + "epoch": 9.872722573975967, + "learning_rate": 1.3841566186957083e-07, + "loss": 2.4912, + "step": 38200 + }, + { + "epoch": 9.87789120041349, + "learning_rate": 1.3838324906002852e-07, + "loss": 2.5356, + "step": 38220 + }, + { + "epoch": 9.883059826851014, + "learning_rate": 1.3835083625048618e-07, + "loss": 2.4311, + "step": 38240 + }, + { + "epoch": 9.888228453288539, + "learning_rate": 1.3831842344094384e-07, + "loss": 2.4831, + "step": 38260 + }, + { + "epoch": 9.893397079726062, + "learning_rate": 1.3828601063140153e-07, + "loss": 2.4975, + "step": 38280 + }, + { + "epoch": 9.898565706163588, + "learning_rate": 1.382535978218592e-07, + "loss": 2.5113, + "step": 38300 + }, + { + "epoch": 9.903734332601111, + "learning_rate": 1.3822118501231685e-07, + "loss": 2.4731, + "step": 38320 + }, + { + "epoch": 9.908902959038635, + "learning_rate": 1.3818877220277454e-07, + "loss": 2.5272, + "step": 38340 + }, + { + "epoch": 9.91407158547616, + "learning_rate": 1.381563593932322e-07, + "loss": 2.5113, + "step": 38360 + }, + { + "epoch": 9.919240211913683, + "learning_rate": 1.3812394658368986e-07, + "loss": 2.5481, + "step": 38380 + }, + { + "epoch": 9.924408838351209, + "learning_rate": 1.3809153377414755e-07, + "loss": 2.4876, + "step": 38400 + }, + { + "epoch": 9.929577464788732, + "learning_rate": 1.380591209646052e-07, + "loss": 2.4875, + "step": 38420 + }, + { + "epoch": 9.934746091226257, + "learning_rate": 1.3802670815506288e-07, + "loss": 2.4732, + "step": 38440 + }, + { + "epoch": 9.93991471766378, + "learning_rate": 1.3799429534552054e-07, + "loss": 2.5036, + "step": 38460 + }, + { + "epoch": 9.945083344101306, + "learning_rate": 1.379618825359782e-07, + "loss": 2.4979, + "step": 38480 + }, + { + "epoch": 9.95025197053883, + "learning_rate": 1.379294697264359e-07, + "loss": 2.4916, + "step": 38500 + }, + { + "epoch": 9.955420596976353, + "learning_rate": 1.3789705691689355e-07, + "loss": 2.4518, + "step": 38520 + }, + { + "epoch": 9.960589223413878, + "learning_rate": 1.378646441073512e-07, + "loss": 2.4727, + "step": 38540 + }, + { + "epoch": 9.965757849851402, + "learning_rate": 1.378322312978089e-07, + "loss": 2.5166, + "step": 38560 + }, + { + "epoch": 9.970926476288927, + "learning_rate": 1.3779981848826656e-07, + "loss": 2.5162, + "step": 38580 + }, + { + "epoch": 9.97609510272645, + "learning_rate": 1.3776740567872422e-07, + "loss": 2.4999, + "step": 38600 + }, + { + "epoch": 9.981263729163974, + "learning_rate": 1.377349928691819e-07, + "loss": 2.4778, + "step": 38620 + }, + { + "epoch": 9.986432355601499, + "learning_rate": 1.3770258005963955e-07, + "loss": 2.4741, + "step": 38640 + }, + { + "epoch": 9.991600982039023, + "learning_rate": 1.3767016725009724e-07, + "loss": 2.5227, + "step": 38660 + }, + { + "epoch": 9.996769608476548, + "learning_rate": 1.376377544405549e-07, + "loss": 2.537, + "step": 38680 + }, + { + "epoch": 9.999870784339063, + "eval_bleu": 11.3364, + "eval_gen_len": 36.0732, + "eval_loss": 2.484008550643921, + "eval_runtime": 789.2845, + "eval_samples_per_second": 2.182, + "eval_steps_per_second": 1.091, + "step": 38692 } ], "logging_steps": 20, @@ -5884,7 +11736,7 @@ "attributes": {} } }, - "total_flos": 1.2701974646813491e+17, + "total_flos": 2.540236796612444e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null