diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,9 +1,9 @@ { - "best_metric": 10.2353, - "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-34823", - "epoch": 9.0, + "best_metric": 0.5081, + "best_model_checkpoint": "/kaggle/tmp/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-19347", + "epoch": 4.999870784339062, "eval_steps": 500, - "global_step": 34823, + "global_step": 19347, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -5865,4690 +5865,6 @@ "eval_samples_per_second": 1.019, "eval_steps_per_second": 0.51, "step": 19347 - }, - { - "epoch": 5.0038764698281435, - "learning_rate": 1.6894852845844678e-07, - "loss": 2.6522, - "step": 19360 - }, - { - "epoch": 5.009045096265667, - "learning_rate": 1.6891611564890445e-07, - "loss": 2.6008, - "step": 19380 - }, - { - "epoch": 5.014213722703191, - "learning_rate": 1.688837028393621e-07, - "loss": 2.6294, - "step": 19400 - }, - { - "epoch": 5.019382349140716, - "learning_rate": 1.6885129002981977e-07, - "loss": 2.6519, - "step": 19420 - }, - { - "epoch": 5.02455097557824, - "learning_rate": 1.6881887722027743e-07, - "loss": 2.6537, - "step": 19440 - }, - { - "epoch": 5.029719602015764, - "learning_rate": 1.6878646441073512e-07, - "loss": 2.6256, - "step": 19460 - }, - { - "epoch": 5.034888228453289, - "learning_rate": 1.6875405160119278e-07, - "loss": 2.6721, - "step": 19480 - }, - { - "epoch": 5.040056854890813, - "learning_rate": 1.6872163879165044e-07, - "loss": 2.6894, - "step": 19500 - }, - { - "epoch": 5.045225481328337, - "learning_rate": 1.6868922598210813e-07, - "loss": 2.6702, - "step": 19520 - }, - { - "epoch": 5.050394107765861, - "learning_rate": 1.686568131725658e-07, - "loss": 2.7041, - "step": 19540 - }, - { - "epoch": 5.055562734203385, - "learning_rate": 1.6862440036302346e-07, - "loss": 2.7243, - "step": 19560 - }, - { - "epoch": 5.06073136064091, - "learning_rate": 1.6859198755348114e-07, - "loss": 2.7082, - "step": 19580 - }, - { - "epoch": 5.065899987078434, - "learning_rate": 1.685595747439388e-07, - "loss": 2.6755, - "step": 19600 - }, - { - "epoch": 5.071068613515958, - "learning_rate": 1.6852716193439647e-07, - "loss": 2.6075, - "step": 19620 - }, - { - "epoch": 5.076237239953483, - "learning_rate": 1.6849474912485413e-07, - "loss": 2.6402, - "step": 19640 - }, - { - "epoch": 5.081405866391006, - "learning_rate": 1.684623363153118e-07, - "loss": 2.6928, - "step": 19660 - }, - { - "epoch": 5.086574492828531, - "learning_rate": 1.6842992350576948e-07, - "loss": 2.6689, - "step": 19680 - }, - { - "epoch": 5.091743119266055, - "learning_rate": 1.6839751069622714e-07, - "loss": 2.6848, - "step": 19700 - }, - { - "epoch": 5.096911745703579, - "learning_rate": 1.683650978866848e-07, - "loss": 2.6315, - "step": 19720 - }, - { - "epoch": 5.102080372141104, - "learning_rate": 1.683326850771425e-07, - "loss": 2.6936, - "step": 19740 - }, - { - "epoch": 5.107248998578628, - "learning_rate": 1.6830027226760015e-07, - "loss": 2.6354, - "step": 19760 - }, - { - "epoch": 5.112417625016152, - "learning_rate": 1.6826785945805782e-07, - "loss": 2.6376, - "step": 19780 - }, - { - "epoch": 5.117586251453676, - "learning_rate": 1.6823544664851548e-07, - "loss": 2.7595, - "step": 19800 - }, - { - "epoch": 5.1227548778912, - "learning_rate": 1.6820303383897317e-07, - "loss": 2.5688, - "step": 19820 - }, - { - "epoch": 5.127923504328725, - "learning_rate": 1.6817062102943083e-07, - "loss": 2.6504, - "step": 19840 - }, - { - "epoch": 5.133092130766249, - "learning_rate": 1.681382082198885e-07, - "loss": 2.701, - "step": 19860 - }, - { - "epoch": 5.138260757203773, - "learning_rate": 1.6810579541034615e-07, - "loss": 2.6691, - "step": 19880 - }, - { - "epoch": 5.143429383641298, - "learning_rate": 1.6807338260080384e-07, - "loss": 2.6396, - "step": 19900 - }, - { - "epoch": 5.148598010078821, - "learning_rate": 1.680409697912615e-07, - "loss": 2.6752, - "step": 19920 - }, - { - "epoch": 5.1537666365163455, - "learning_rate": 1.6800855698171916e-07, - "loss": 2.567, - "step": 19940 - }, - { - "epoch": 5.15893526295387, - "learning_rate": 1.6797614417217685e-07, - "loss": 2.6691, - "step": 19960 - }, - { - "epoch": 5.164103889391394, - "learning_rate": 1.6794373136263451e-07, - "loss": 2.6172, - "step": 19980 - }, - { - "epoch": 5.169272515828919, - "learning_rate": 1.6791131855309218e-07, - "loss": 2.6856, - "step": 20000 - }, - { - "epoch": 5.174441142266443, - "learning_rate": 1.6787890574354984e-07, - "loss": 2.6874, - "step": 20020 - }, - { - "epoch": 5.179609768703967, - "learning_rate": 1.678464929340075e-07, - "loss": 2.6737, - "step": 20040 - }, - { - "epoch": 5.184778395141491, - "learning_rate": 1.678140801244652e-07, - "loss": 2.6503, - "step": 20060 - }, - { - "epoch": 5.189947021579015, - "learning_rate": 1.6778166731492285e-07, - "loss": 2.6155, - "step": 20080 - }, - { - "epoch": 5.1951156480165395, - "learning_rate": 1.677492545053805e-07, - "loss": 2.7035, - "step": 20100 - }, - { - "epoch": 5.200284274454064, - "learning_rate": 1.677168416958382e-07, - "loss": 2.6192, - "step": 20120 - }, - { - "epoch": 5.205452900891588, - "learning_rate": 1.6768442888629586e-07, - "loss": 2.5974, - "step": 20140 - }, - { - "epoch": 5.2106215273291125, - "learning_rate": 1.6765201607675352e-07, - "loss": 2.694, - "step": 20160 - }, - { - "epoch": 5.215790153766637, - "learning_rate": 1.676196032672112e-07, - "loss": 2.6391, - "step": 20180 - }, - { - "epoch": 5.22095878020416, - "learning_rate": 1.6758719045766887e-07, - "loss": 2.6428, - "step": 20200 - }, - { - "epoch": 5.226127406641685, - "learning_rate": 1.6755477764812654e-07, - "loss": 2.6796, - "step": 20220 - }, - { - "epoch": 5.231296033079209, - "learning_rate": 1.675223648385842e-07, - "loss": 2.6819, - "step": 20240 - }, - { - "epoch": 5.2364646595167335, - "learning_rate": 1.6748995202904186e-07, - "loss": 2.6714, - "step": 20260 - }, - { - "epoch": 5.241633285954258, - "learning_rate": 1.6745753921949955e-07, - "loss": 2.6392, - "step": 20280 - }, - { - "epoch": 5.246801912391782, - "learning_rate": 1.674251264099572e-07, - "loss": 2.7057, - "step": 20300 - }, - { - "epoch": 5.2519705388293065, - "learning_rate": 1.6739271360041487e-07, - "loss": 2.6676, - "step": 20320 - }, - { - "epoch": 5.25713916526683, - "learning_rate": 1.6736030079087256e-07, - "loss": 2.7037, - "step": 20340 - }, - { - "epoch": 5.262307791704354, - "learning_rate": 1.6732788798133022e-07, - "loss": 2.7595, - "step": 20360 - }, - { - "epoch": 5.267476418141879, - "learning_rate": 1.6729547517178788e-07, - "loss": 2.6665, - "step": 20380 - }, - { - "epoch": 5.272645044579403, - "learning_rate": 1.6726306236224554e-07, - "loss": 2.6281, - "step": 20400 - }, - { - "epoch": 5.277813671016927, - "learning_rate": 1.6723064955270323e-07, - "loss": 2.6047, - "step": 20420 - }, - { - "epoch": 5.282982297454452, - "learning_rate": 1.671982367431609e-07, - "loss": 2.6466, - "step": 20440 - }, - { - "epoch": 5.288150923891976, - "learning_rate": 1.6716582393361856e-07, - "loss": 2.6026, - "step": 20460 - }, - { - "epoch": 5.2933195503295, - "learning_rate": 1.6713341112407622e-07, - "loss": 2.6714, - "step": 20480 - }, - { - "epoch": 5.298488176767024, - "learning_rate": 1.671009983145339e-07, - "loss": 2.6351, - "step": 20500 - }, - { - "epoch": 5.303656803204548, - "learning_rate": 1.6706858550499157e-07, - "loss": 2.6293, - "step": 20520 - }, - { - "epoch": 5.308825429642073, - "learning_rate": 1.6703617269544923e-07, - "loss": 2.6368, - "step": 20540 - }, - { - "epoch": 5.313994056079597, - "learning_rate": 1.6700375988590692e-07, - "loss": 2.6963, - "step": 20560 - }, - { - "epoch": 5.319162682517121, - "learning_rate": 1.6697134707636458e-07, - "loss": 2.6401, - "step": 20580 - }, - { - "epoch": 5.324331308954645, - "learning_rate": 1.6693893426682224e-07, - "loss": 2.669, - "step": 20600 - }, - { - "epoch": 5.329499935392169, - "learning_rate": 1.669065214572799e-07, - "loss": 2.6384, - "step": 20620 - }, - { - "epoch": 5.334668561829694, - "learning_rate": 1.6687410864773757e-07, - "loss": 2.7073, - "step": 20640 - }, - { - "epoch": 5.339837188267218, - "learning_rate": 1.6684169583819526e-07, - "loss": 2.6507, - "step": 20660 - }, - { - "epoch": 5.345005814704742, - "learning_rate": 1.6680928302865292e-07, - "loss": 2.6378, - "step": 20680 - }, - { - "epoch": 5.350174441142267, - "learning_rate": 1.6677687021911058e-07, - "loss": 2.6756, - "step": 20700 - }, - { - "epoch": 5.355343067579791, - "learning_rate": 1.6674445740956827e-07, - "loss": 2.6667, - "step": 20720 - }, - { - "epoch": 5.3605116940173145, - "learning_rate": 1.6671204460002593e-07, - "loss": 2.6714, - "step": 20740 - }, - { - "epoch": 5.365680320454839, - "learning_rate": 1.666796317904836e-07, - "loss": 2.6117, - "step": 20760 - }, - { - "epoch": 5.370848946892363, - "learning_rate": 1.6664721898094128e-07, - "loss": 2.6513, - "step": 20780 - }, - { - "epoch": 5.376017573329888, - "learning_rate": 1.6661480617139894e-07, - "loss": 2.6997, - "step": 20800 - }, - { - "epoch": 5.381186199767412, - "learning_rate": 1.665823933618566e-07, - "loss": 2.6395, - "step": 20820 - }, - { - "epoch": 5.386354826204936, - "learning_rate": 1.6654998055231426e-07, - "loss": 2.6615, - "step": 20840 - }, - { - "epoch": 5.39152345264246, - "learning_rate": 1.6651756774277193e-07, - "loss": 2.6915, - "step": 20860 - }, - { - "epoch": 5.396692079079984, - "learning_rate": 1.6648515493322961e-07, - "loss": 2.6248, - "step": 20880 - }, - { - "epoch": 5.4018607055175085, - "learning_rate": 1.6645274212368728e-07, - "loss": 2.7061, - "step": 20900 - }, - { - "epoch": 5.407029331955033, - "learning_rate": 1.6642032931414494e-07, - "loss": 2.642, - "step": 20920 - }, - { - "epoch": 5.412197958392557, - "learning_rate": 1.6638791650460263e-07, - "loss": 2.6377, - "step": 20940 - }, - { - "epoch": 5.417366584830082, - "learning_rate": 1.663555036950603e-07, - "loss": 2.6452, - "step": 20960 - }, - { - "epoch": 5.422535211267606, - "learning_rate": 1.6632309088551795e-07, - "loss": 2.7206, - "step": 20980 - }, - { - "epoch": 5.427703837705129, - "learning_rate": 1.662906780759756e-07, - "loss": 2.6524, - "step": 21000 - }, - { - "epoch": 5.432872464142654, - "learning_rate": 1.662582652664333e-07, - "loss": 2.6398, - "step": 21020 - }, - { - "epoch": 5.438041090580178, - "learning_rate": 1.6622585245689096e-07, - "loss": 2.6486, - "step": 21040 - }, - { - "epoch": 5.4432097170177025, - "learning_rate": 1.6619343964734862e-07, - "loss": 2.6466, - "step": 21060 - }, - { - "epoch": 5.448378343455227, - "learning_rate": 1.6616102683780629e-07, - "loss": 2.6818, - "step": 21080 - }, - { - "epoch": 5.453546969892751, - "learning_rate": 1.6612861402826397e-07, - "loss": 2.6826, - "step": 21100 - }, - { - "epoch": 5.458715596330276, - "learning_rate": 1.6609620121872164e-07, - "loss": 2.665, - "step": 21120 - }, - { - "epoch": 5.463884222767799, - "learning_rate": 1.660637884091793e-07, - "loss": 2.6018, - "step": 21140 - }, - { - "epoch": 5.469052849205323, - "learning_rate": 1.66031375599637e-07, - "loss": 2.6867, - "step": 21160 - }, - { - "epoch": 5.474221475642848, - "learning_rate": 1.6599896279009465e-07, - "loss": 2.6972, - "step": 21180 - }, - { - "epoch": 5.479390102080372, - "learning_rate": 1.659665499805523e-07, - "loss": 2.6775, - "step": 21200 - }, - { - "epoch": 5.4845587285178965, - "learning_rate": 1.6593413717100997e-07, - "loss": 2.7239, - "step": 21220 - }, - { - "epoch": 5.489727354955421, - "learning_rate": 1.6590172436146763e-07, - "loss": 2.6349, - "step": 21240 - }, - { - "epoch": 5.494895981392945, - "learning_rate": 1.6586931155192532e-07, - "loss": 2.6607, - "step": 21260 - }, - { - "epoch": 5.500064607830469, - "learning_rate": 1.6583689874238298e-07, - "loss": 2.6806, - "step": 21280 - }, - { - "epoch": 5.505233234267993, - "learning_rate": 1.6580448593284065e-07, - "loss": 2.7212, - "step": 21300 - }, - { - "epoch": 5.510401860705517, - "learning_rate": 1.6577207312329833e-07, - "loss": 2.6997, - "step": 21320 - }, - { - "epoch": 5.515570487143042, - "learning_rate": 1.65739660313756e-07, - "loss": 2.6277, - "step": 21340 - }, - { - "epoch": 5.520739113580566, - "learning_rate": 1.6570724750421366e-07, - "loss": 2.6953, - "step": 21360 - }, - { - "epoch": 5.5259077400180905, - "learning_rate": 1.6567483469467135e-07, - "loss": 2.7586, - "step": 21380 - }, - { - "epoch": 5.531076366455615, - "learning_rate": 1.65642421885129e-07, - "loss": 2.5887, - "step": 21400 - }, - { - "epoch": 5.536244992893138, - "learning_rate": 1.6561000907558667e-07, - "loss": 2.6364, - "step": 21420 - }, - { - "epoch": 5.541413619330663, - "learning_rate": 1.6557759626604433e-07, - "loss": 2.5951, - "step": 21440 - }, - { - "epoch": 5.546582245768187, - "learning_rate": 1.65545183456502e-07, - "loss": 2.6595, - "step": 21460 - }, - { - "epoch": 5.551750872205711, - "learning_rate": 1.6551277064695968e-07, - "loss": 2.602, - "step": 21480 - }, - { - "epoch": 5.556919498643236, - "learning_rate": 1.6548035783741734e-07, - "loss": 2.5996, - "step": 21500 - }, - { - "epoch": 5.56208812508076, - "learning_rate": 1.65447945027875e-07, - "loss": 2.6529, - "step": 21520 - }, - { - "epoch": 5.5672567515182845, - "learning_rate": 1.654155322183327e-07, - "loss": 2.6917, - "step": 21540 - }, - { - "epoch": 5.572425377955808, - "learning_rate": 1.6538311940879036e-07, - "loss": 2.6509, - "step": 21560 - }, - { - "epoch": 5.577594004393332, - "learning_rate": 1.6535070659924802e-07, - "loss": 2.6524, - "step": 21580 - }, - { - "epoch": 5.582762630830857, - "learning_rate": 1.6531829378970568e-07, - "loss": 2.6894, - "step": 21600 - }, - { - "epoch": 5.587931257268381, - "learning_rate": 1.6528588098016334e-07, - "loss": 2.6451, - "step": 21620 - }, - { - "epoch": 5.593099883705905, - "learning_rate": 1.6525346817062103e-07, - "loss": 2.6636, - "step": 21640 - }, - { - "epoch": 5.59826851014343, - "learning_rate": 1.652210553610787e-07, - "loss": 2.6696, - "step": 21660 - }, - { - "epoch": 5.603437136580954, - "learning_rate": 1.6518864255153635e-07, - "loss": 2.6577, - "step": 21680 - }, - { - "epoch": 5.608605763018478, - "learning_rate": 1.6515622974199404e-07, - "loss": 2.5968, - "step": 21700 - }, - { - "epoch": 5.613774389456002, - "learning_rate": 1.651238169324517e-07, - "loss": 2.6492, - "step": 21720 - }, - { - "epoch": 5.618943015893526, - "learning_rate": 1.6509140412290937e-07, - "loss": 2.6357, - "step": 21740 - }, - { - "epoch": 5.624111642331051, - "learning_rate": 1.6505899131336705e-07, - "loss": 2.6401, - "step": 21760 - }, - { - "epoch": 5.629280268768575, - "learning_rate": 1.650265785038247e-07, - "loss": 2.5984, - "step": 21780 - }, - { - "epoch": 5.6344488952060985, - "learning_rate": 1.6499416569428238e-07, - "loss": 2.6678, - "step": 21800 - }, - { - "epoch": 5.639617521643623, - "learning_rate": 1.6496175288474004e-07, - "loss": 2.6161, - "step": 21820 - }, - { - "epoch": 5.644786148081147, - "learning_rate": 1.649293400751977e-07, - "loss": 2.6262, - "step": 21840 - }, - { - "epoch": 5.649954774518672, - "learning_rate": 1.648969272656554e-07, - "loss": 2.6514, - "step": 21860 - }, - { - "epoch": 5.655123400956196, - "learning_rate": 1.6486451445611305e-07, - "loss": 2.6629, - "step": 21880 - }, - { - "epoch": 5.66029202739372, - "learning_rate": 1.6483210164657071e-07, - "loss": 2.6764, - "step": 21900 - }, - { - "epoch": 5.665460653831245, - "learning_rate": 1.647996888370284e-07, - "loss": 2.6414, - "step": 21920 - }, - { - "epoch": 5.670629280268768, - "learning_rate": 1.6476727602748604e-07, - "loss": 2.5379, - "step": 21940 - }, - { - "epoch": 5.6757979067062925, - "learning_rate": 1.6473486321794373e-07, - "loss": 2.6744, - "step": 21960 - }, - { - "epoch": 5.680966533143817, - "learning_rate": 1.647024504084014e-07, - "loss": 2.7254, - "step": 21980 - }, - { - "epoch": 5.686135159581341, - "learning_rate": 1.6467003759885905e-07, - "loss": 2.6408, - "step": 22000 - }, - { - "epoch": 5.6913037860188656, - "learning_rate": 1.6463762478931674e-07, - "loss": 2.6751, - "step": 22020 - }, - { - "epoch": 5.69647241245639, - "learning_rate": 1.646052119797744e-07, - "loss": 2.6391, - "step": 22040 - }, - { - "epoch": 5.701641038893914, - "learning_rate": 1.6457279917023206e-07, - "loss": 2.625, - "step": 22060 - }, - { - "epoch": 5.706809665331438, - "learning_rate": 1.6454038636068975e-07, - "loss": 2.607, - "step": 22080 - }, - { - "epoch": 5.711978291768962, - "learning_rate": 1.6450797355114739e-07, - "loss": 2.6629, - "step": 22100 - }, - { - "epoch": 5.7171469182064865, - "learning_rate": 1.6447556074160507e-07, - "loss": 2.6358, - "step": 22120 - }, - { - "epoch": 5.722315544644011, - "learning_rate": 1.6444314793206274e-07, - "loss": 2.6962, - "step": 22140 - }, - { - "epoch": 5.727484171081535, - "learning_rate": 1.644107351225204e-07, - "loss": 2.6403, - "step": 22160 - }, - { - "epoch": 5.7326527975190595, - "learning_rate": 1.6437832231297809e-07, - "loss": 2.641, - "step": 22180 - }, - { - "epoch": 5.737821423956584, - "learning_rate": 1.6434590950343575e-07, - "loss": 2.6213, - "step": 22200 - }, - { - "epoch": 5.742990050394107, - "learning_rate": 1.643134966938934e-07, - "loss": 2.6508, - "step": 22220 - }, - { - "epoch": 5.748158676831632, - "learning_rate": 1.642810838843511e-07, - "loss": 2.6528, - "step": 22240 - }, - { - "epoch": 5.753327303269156, - "learning_rate": 1.6424867107480873e-07, - "loss": 2.5921, - "step": 22260 - }, - { - "epoch": 5.7584959297066804, - "learning_rate": 1.6421625826526642e-07, - "loss": 2.6379, - "step": 22280 - }, - { - "epoch": 5.763664556144205, - "learning_rate": 1.641838454557241e-07, - "loss": 2.6838, - "step": 22300 - }, - { - "epoch": 5.768833182581729, - "learning_rate": 1.6415143264618175e-07, - "loss": 2.6511, - "step": 22320 - }, - { - "epoch": 5.7740018090192535, - "learning_rate": 1.6411901983663943e-07, - "loss": 2.6547, - "step": 22340 - }, - { - "epoch": 5.779170435456777, - "learning_rate": 1.640866070270971e-07, - "loss": 2.5962, - "step": 22360 - }, - { - "epoch": 5.784339061894301, - "learning_rate": 1.6405419421755476e-07, - "loss": 2.6074, - "step": 22380 - }, - { - "epoch": 5.789507688331826, - "learning_rate": 1.6402178140801245e-07, - "loss": 2.5927, - "step": 22400 - }, - { - "epoch": 5.79467631476935, - "learning_rate": 1.6398936859847008e-07, - "loss": 2.6697, - "step": 22420 - }, - { - "epoch": 5.799844941206874, - "learning_rate": 1.6395695578892777e-07, - "loss": 2.6267, - "step": 22440 - }, - { - "epoch": 5.805013567644399, - "learning_rate": 1.6392454297938546e-07, - "loss": 2.6833, - "step": 22460 - }, - { - "epoch": 5.810182194081923, - "learning_rate": 1.638921301698431e-07, - "loss": 2.6647, - "step": 22480 - }, - { - "epoch": 5.815350820519447, - "learning_rate": 1.6385971736030078e-07, - "loss": 2.6619, - "step": 22500 - }, - { - "epoch": 5.820519446956971, - "learning_rate": 1.6382730455075847e-07, - "loss": 2.6341, - "step": 22520 - }, - { - "epoch": 5.825688073394495, - "learning_rate": 1.637948917412161e-07, - "loss": 2.604, - "step": 22540 - }, - { - "epoch": 5.83085669983202, - "learning_rate": 1.637624789316738e-07, - "loss": 2.6344, - "step": 22560 - }, - { - "epoch": 5.836025326269544, - "learning_rate": 1.6373006612213146e-07, - "loss": 2.6453, - "step": 22580 - }, - { - "epoch": 5.841193952707068, - "learning_rate": 1.6369765331258912e-07, - "loss": 2.6433, - "step": 22600 - }, - { - "epoch": 5.846362579144593, - "learning_rate": 1.636652405030468e-07, - "loss": 2.5673, - "step": 22620 - }, - { - "epoch": 5.851531205582116, - "learning_rate": 1.6363282769350444e-07, - "loss": 2.6455, - "step": 22640 - }, - { - "epoch": 5.856699832019641, - "learning_rate": 1.6360041488396213e-07, - "loss": 2.6617, - "step": 22660 - }, - { - "epoch": 5.861868458457165, - "learning_rate": 1.6356800207441982e-07, - "loss": 2.6395, - "step": 22680 - }, - { - "epoch": 5.867037084894689, - "learning_rate": 1.6353558926487745e-07, - "loss": 2.6358, - "step": 22700 - }, - { - "epoch": 5.872205711332214, - "learning_rate": 1.6350317645533514e-07, - "loss": 2.618, - "step": 22720 - }, - { - "epoch": 5.877374337769738, - "learning_rate": 1.634707636457928e-07, - "loss": 2.6219, - "step": 22740 - }, - { - "epoch": 5.882542964207262, - "learning_rate": 1.6343835083625047e-07, - "loss": 2.6028, - "step": 22760 - }, - { - "epoch": 5.887711590644786, - "learning_rate": 1.6340593802670815e-07, - "loss": 2.599, - "step": 22780 - }, - { - "epoch": 5.89288021708231, - "learning_rate": 1.633735252171658e-07, - "loss": 2.6062, - "step": 22800 - }, - { - "epoch": 5.898048843519835, - "learning_rate": 1.6334111240762348e-07, - "loss": 2.5854, - "step": 22820 - }, - { - "epoch": 5.903217469957359, - "learning_rate": 1.6330869959808117e-07, - "loss": 2.5416, - "step": 22840 - }, - { - "epoch": 5.908386096394883, - "learning_rate": 1.632762867885388e-07, - "loss": 2.6731, - "step": 22860 - }, - { - "epoch": 5.913554722832407, - "learning_rate": 1.632438739789965e-07, - "loss": 2.6271, - "step": 22880 - }, - { - "epoch": 5.918723349269931, - "learning_rate": 1.6321146116945418e-07, - "loss": 2.5896, - "step": 22900 - }, - { - "epoch": 5.9238919757074555, - "learning_rate": 1.6317904835991181e-07, - "loss": 2.6794, - "step": 22920 - }, - { - "epoch": 5.92906060214498, - "learning_rate": 1.631466355503695e-07, - "loss": 2.6051, - "step": 22940 - }, - { - "epoch": 5.934229228582504, - "learning_rate": 1.6311422274082716e-07, - "loss": 2.6901, - "step": 22960 - }, - { - "epoch": 5.939397855020029, - "learning_rate": 1.6308180993128483e-07, - "loss": 2.6288, - "step": 22980 - }, - { - "epoch": 5.944566481457553, - "learning_rate": 1.6304939712174251e-07, - "loss": 2.6806, - "step": 23000 - }, - { - "epoch": 5.949735107895076, - "learning_rate": 1.6301698431220015e-07, - "loss": 2.6003, - "step": 23020 - }, - { - "epoch": 5.954903734332601, - "learning_rate": 1.6298457150265784e-07, - "loss": 2.6595, - "step": 23040 - }, - { - "epoch": 5.960072360770125, - "learning_rate": 1.6295215869311553e-07, - "loss": 2.6563, - "step": 23060 - }, - { - "epoch": 5.9652409872076495, - "learning_rate": 1.6291974588357316e-07, - "loss": 2.6499, - "step": 23080 - }, - { - "epoch": 5.970409613645174, - "learning_rate": 1.6288733307403085e-07, - "loss": 2.6836, - "step": 23100 - }, - { - "epoch": 5.975578240082698, - "learning_rate": 1.6285492026448854e-07, - "loss": 2.6193, - "step": 23120 - }, - { - "epoch": 5.980746866520223, - "learning_rate": 1.6282250745494617e-07, - "loss": 2.6293, - "step": 23140 - }, - { - "epoch": 5.985915492957746, - "learning_rate": 1.6279009464540386e-07, - "loss": 2.7275, - "step": 23160 - }, - { - "epoch": 5.99108411939527, - "learning_rate": 1.6275768183586152e-07, - "loss": 2.6021, - "step": 23180 - }, - { - "epoch": 5.996252745832795, - "learning_rate": 1.6272526902631919e-07, - "loss": 2.6331, - "step": 23200 - }, - { - "epoch": 5.999870784339062, - "eval_bleu": 1.6991, - "eval_gen_len": 66.9245, - "eval_loss": 2.596095085144043, - "eval_runtime": 1347.5897, - "eval_samples_per_second": 1.278, - "eval_steps_per_second": 0.639, - "step": 23214 - }, - { - "epoch": 6.001421372270319, - "learning_rate": 1.6269285621677687e-07, - "loss": 2.6423, - "step": 23220 - }, - { - "epoch": 6.0065899987078435, - "learning_rate": 1.626604434072345e-07, - "loss": 2.667, - "step": 23240 - }, - { - "epoch": 6.011758625145368, - "learning_rate": 1.626280305976922e-07, - "loss": 2.6741, - "step": 23260 - }, - { - "epoch": 6.016927251582892, - "learning_rate": 1.6259561778814989e-07, - "loss": 2.6269, - "step": 23280 - }, - { - "epoch": 6.022095878020416, - "learning_rate": 1.6256320497860752e-07, - "loss": 2.6817, - "step": 23300 - }, - { - "epoch": 6.02726450445794, - "learning_rate": 1.625307921690652e-07, - "loss": 2.6804, - "step": 23320 - }, - { - "epoch": 6.032433130895464, - "learning_rate": 1.6249837935952287e-07, - "loss": 2.6283, - "step": 23340 - }, - { - "epoch": 6.037601757332989, - "learning_rate": 1.6246596654998053e-07, - "loss": 2.6344, - "step": 23360 - }, - { - "epoch": 6.042770383770513, - "learning_rate": 1.6243355374043822e-07, - "loss": 2.6453, - "step": 23380 - }, - { - "epoch": 6.0479390102080375, - "learning_rate": 1.6240114093089586e-07, - "loss": 2.5926, - "step": 23400 - }, - { - "epoch": 6.053107636645562, - "learning_rate": 1.6236872812135355e-07, - "loss": 2.6709, - "step": 23420 - }, - { - "epoch": 6.058276263083085, - "learning_rate": 1.6233631531181123e-07, - "loss": 2.6228, - "step": 23440 - }, - { - "epoch": 6.06344488952061, - "learning_rate": 1.6230390250226887e-07, - "loss": 2.6006, - "step": 23460 - }, - { - "epoch": 6.068613515958134, - "learning_rate": 1.6227148969272656e-07, - "loss": 2.6179, - "step": 23480 - }, - { - "epoch": 6.073782142395658, - "learning_rate": 1.6223907688318425e-07, - "loss": 2.63, - "step": 23500 - }, - { - "epoch": 6.078950768833183, - "learning_rate": 1.6220666407364188e-07, - "loss": 2.653, - "step": 23520 - }, - { - "epoch": 6.084119395270707, - "learning_rate": 1.6217425126409957e-07, - "loss": 2.6129, - "step": 23540 - }, - { - "epoch": 6.0892880217082315, - "learning_rate": 1.6214183845455723e-07, - "loss": 2.6008, - "step": 23560 - }, - { - "epoch": 6.094456648145755, - "learning_rate": 1.621094256450149e-07, - "loss": 2.6594, - "step": 23580 - }, - { - "epoch": 6.099625274583279, - "learning_rate": 1.6207701283547258e-07, - "loss": 2.617, - "step": 23600 - }, - { - "epoch": 6.104793901020804, - "learning_rate": 1.6204460002593022e-07, - "loss": 2.6392, - "step": 23620 - }, - { - "epoch": 6.109962527458328, - "learning_rate": 1.620121872163879e-07, - "loss": 2.6315, - "step": 23640 - }, - { - "epoch": 6.115131153895852, - "learning_rate": 1.619797744068456e-07, - "loss": 2.6131, - "step": 23660 - }, - { - "epoch": 6.120299780333377, - "learning_rate": 1.6194736159730323e-07, - "loss": 2.6512, - "step": 23680 - }, - { - "epoch": 6.1254684067709, - "learning_rate": 1.6191494878776092e-07, - "loss": 2.6196, - "step": 23700 - }, - { - "epoch": 6.130637033208425, - "learning_rate": 1.618825359782186e-07, - "loss": 2.6234, - "step": 23720 - }, - { - "epoch": 6.135805659645949, - "learning_rate": 1.6185012316867624e-07, - "loss": 2.5817, - "step": 23740 - }, - { - "epoch": 6.140974286083473, - "learning_rate": 1.6181771035913393e-07, - "loss": 2.5873, - "step": 23760 - }, - { - "epoch": 6.146142912520998, - "learning_rate": 1.617852975495916e-07, - "loss": 2.5957, - "step": 23780 - }, - { - "epoch": 6.151311538958522, - "learning_rate": 1.6175288474004925e-07, - "loss": 2.613, - "step": 23800 - }, - { - "epoch": 6.156480165396046, - "learning_rate": 1.6172047193050694e-07, - "loss": 2.5577, - "step": 23820 - }, - { - "epoch": 6.16164879183357, - "learning_rate": 1.6168805912096458e-07, - "loss": 2.6101, - "step": 23840 - }, - { - "epoch": 6.166817418271094, - "learning_rate": 1.6165564631142227e-07, - "loss": 2.5553, - "step": 23860 - }, - { - "epoch": 6.171986044708619, - "learning_rate": 1.6162323350187995e-07, - "loss": 2.6326, - "step": 23880 - }, - { - "epoch": 6.177154671146143, - "learning_rate": 1.615908206923376e-07, - "loss": 2.5922, - "step": 23900 - }, - { - "epoch": 6.182323297583667, - "learning_rate": 1.6155840788279528e-07, - "loss": 2.5913, - "step": 23920 - }, - { - "epoch": 6.187491924021192, - "learning_rate": 1.6152599507325294e-07, - "loss": 2.6378, - "step": 23940 - }, - { - "epoch": 6.192660550458716, - "learning_rate": 1.614935822637106e-07, - "loss": 2.5969, - "step": 23960 - }, - { - "epoch": 6.1978291768962395, - "learning_rate": 1.614611694541683e-07, - "loss": 2.5971, - "step": 23980 - }, - { - "epoch": 6.202997803333764, - "learning_rate": 1.6142875664462593e-07, - "loss": 2.616, - "step": 24000 - }, - { - "epoch": 6.208166429771288, - "learning_rate": 1.6139634383508361e-07, - "loss": 2.6352, - "step": 24020 - }, - { - "epoch": 6.2133350562088125, - "learning_rate": 1.613639310255413e-07, - "loss": 2.6371, - "step": 24040 - }, - { - "epoch": 6.218503682646337, - "learning_rate": 1.6133151821599894e-07, - "loss": 2.5946, - "step": 24060 - }, - { - "epoch": 6.223672309083861, - "learning_rate": 1.6129910540645663e-07, - "loss": 2.6379, - "step": 24080 - }, - { - "epoch": 6.228840935521385, - "learning_rate": 1.6126669259691431e-07, - "loss": 2.6046, - "step": 24100 - }, - { - "epoch": 6.234009561958909, - "learning_rate": 1.6123427978737195e-07, - "loss": 2.653, - "step": 24120 - }, - { - "epoch": 6.2391781883964335, - "learning_rate": 1.6120186697782964e-07, - "loss": 2.6409, - "step": 24140 - }, - { - "epoch": 6.244346814833958, - "learning_rate": 1.611694541682873e-07, - "loss": 2.6077, - "step": 24160 - }, - { - "epoch": 6.249515441271482, - "learning_rate": 1.6113704135874496e-07, - "loss": 2.5993, - "step": 24180 - }, - { - "epoch": 6.2546840677090065, - "learning_rate": 1.6110462854920265e-07, - "loss": 2.6326, - "step": 24200 - }, - { - "epoch": 6.259852694146531, - "learning_rate": 1.6107221573966029e-07, - "loss": 2.612, - "step": 24220 - }, - { - "epoch": 6.265021320584054, - "learning_rate": 1.6103980293011797e-07, - "loss": 2.662, - "step": 24240 - }, - { - "epoch": 6.270189947021579, - "learning_rate": 1.6100739012057566e-07, - "loss": 2.6377, - "step": 24260 - }, - { - "epoch": 6.275358573459103, - "learning_rate": 1.609749773110333e-07, - "loss": 2.6113, - "step": 24280 - }, - { - "epoch": 6.280527199896627, - "learning_rate": 1.6094256450149099e-07, - "loss": 2.6094, - "step": 24300 - }, - { - "epoch": 6.285695826334152, - "learning_rate": 1.6091015169194867e-07, - "loss": 2.6185, - "step": 24320 - }, - { - "epoch": 6.290864452771676, - "learning_rate": 1.608777388824063e-07, - "loss": 2.6523, - "step": 24340 - }, - { - "epoch": 6.2960330792092005, - "learning_rate": 1.60845326072864e-07, - "loss": 2.6458, - "step": 24360 - }, - { - "epoch": 6.301201705646724, - "learning_rate": 1.6081291326332166e-07, - "loss": 2.5826, - "step": 24380 - }, - { - "epoch": 6.306370332084248, - "learning_rate": 1.6078050045377932e-07, - "loss": 2.6395, - "step": 24400 - }, - { - "epoch": 6.311538958521773, - "learning_rate": 1.60748087644237e-07, - "loss": 2.6305, - "step": 24420 - }, - { - "epoch": 6.316707584959297, - "learning_rate": 1.6071567483469465e-07, - "loss": 2.6493, - "step": 24440 - }, - { - "epoch": 6.321876211396821, - "learning_rate": 1.6068326202515233e-07, - "loss": 2.6198, - "step": 24460 - }, - { - "epoch": 6.327044837834346, - "learning_rate": 1.6065084921561002e-07, - "loss": 2.5705, - "step": 24480 - }, - { - "epoch": 6.33221346427187, - "learning_rate": 1.6061843640606766e-07, - "loss": 2.5797, - "step": 24500 - }, - { - "epoch": 6.337382090709394, - "learning_rate": 1.6058602359652535e-07, - "loss": 2.6152, - "step": 24520 - }, - { - "epoch": 6.342550717146918, - "learning_rate": 1.60553610786983e-07, - "loss": 2.6075, - "step": 24540 - }, - { - "epoch": 6.347719343584442, - "learning_rate": 1.6052119797744067e-07, - "loss": 2.6446, - "step": 24560 - }, - { - "epoch": 6.352887970021967, - "learning_rate": 1.6048878516789836e-07, - "loss": 2.6204, - "step": 24580 - }, - { - "epoch": 6.358056596459491, - "learning_rate": 1.60456372358356e-07, - "loss": 2.6079, - "step": 24600 - }, - { - "epoch": 6.363225222897015, - "learning_rate": 1.6042395954881368e-07, - "loss": 2.59, - "step": 24620 - }, - { - "epoch": 6.36839384933454, - "learning_rate": 1.6039154673927137e-07, - "loss": 2.6417, - "step": 24640 - }, - { - "epoch": 6.373562475772063, - "learning_rate": 1.60359133929729e-07, - "loss": 2.6426, - "step": 24660 - }, - { - "epoch": 6.378731102209588, - "learning_rate": 1.603267211201867e-07, - "loss": 2.6004, - "step": 24680 - }, - { - "epoch": 6.383899728647112, - "learning_rate": 1.6029430831064438e-07, - "loss": 2.6422, - "step": 24700 - }, - { - "epoch": 6.389068355084636, - "learning_rate": 1.6026189550110202e-07, - "loss": 2.595, - "step": 24720 - }, - { - "epoch": 6.394236981522161, - "learning_rate": 1.602294826915597e-07, - "loss": 2.6091, - "step": 24740 - }, - { - "epoch": 6.399405607959685, - "learning_rate": 1.6019706988201737e-07, - "loss": 2.5978, - "step": 24760 - }, - { - "epoch": 6.404574234397209, - "learning_rate": 1.6016465707247503e-07, - "loss": 2.5525, - "step": 24780 - }, - { - "epoch": 6.409742860834733, - "learning_rate": 1.6013224426293272e-07, - "loss": 2.6011, - "step": 24800 - }, - { - "epoch": 6.414911487272257, - "learning_rate": 1.6009983145339035e-07, - "loss": 2.5727, - "step": 24820 - }, - { - "epoch": 6.420080113709782, - "learning_rate": 1.6006741864384804e-07, - "loss": 2.6214, - "step": 24840 - }, - { - "epoch": 6.425248740147306, - "learning_rate": 1.6003500583430573e-07, - "loss": 2.6492, - "step": 24860 - }, - { - "epoch": 6.43041736658483, - "learning_rate": 1.6000259302476336e-07, - "loss": 2.628, - "step": 24880 - }, - { - "epoch": 6.435585993022355, - "learning_rate": 1.5997018021522105e-07, - "loss": 2.6282, - "step": 24900 - }, - { - "epoch": 6.440754619459878, - "learning_rate": 1.5993776740567874e-07, - "loss": 2.6483, - "step": 24920 - }, - { - "epoch": 6.4459232458974025, - "learning_rate": 1.5990535459613638e-07, - "loss": 2.6026, - "step": 24940 - }, - { - "epoch": 6.451091872334927, - "learning_rate": 1.5987294178659407e-07, - "loss": 2.6324, - "step": 24960 - }, - { - "epoch": 6.456260498772451, - "learning_rate": 1.5984052897705173e-07, - "loss": 2.5601, - "step": 24980 - }, - { - "epoch": 6.461429125209976, - "learning_rate": 1.598081161675094e-07, - "loss": 2.6437, - "step": 25000 - }, - { - "epoch": 6.4665977516475, - "learning_rate": 1.5977570335796708e-07, - "loss": 2.6326, - "step": 25020 - }, - { - "epoch": 6.471766378085024, - "learning_rate": 1.597432905484247e-07, - "loss": 2.6664, - "step": 25040 - }, - { - "epoch": 6.476935004522548, - "learning_rate": 1.597108777388824e-07, - "loss": 2.6057, - "step": 25060 - }, - { - "epoch": 6.482103630960072, - "learning_rate": 1.596784649293401e-07, - "loss": 2.6007, - "step": 25080 - }, - { - "epoch": 6.4872722573975965, - "learning_rate": 1.5964605211979772e-07, - "loss": 2.5935, - "step": 25100 - }, - { - "epoch": 6.492440883835121, - "learning_rate": 1.5961363931025541e-07, - "loss": 2.5344, - "step": 25120 - }, - { - "epoch": 6.497609510272645, - "learning_rate": 1.5958122650071308e-07, - "loss": 2.6249, - "step": 25140 - }, - { - "epoch": 6.50277813671017, - "learning_rate": 1.5954881369117074e-07, - "loss": 2.5882, - "step": 25160 - }, - { - "epoch": 6.507946763147693, - "learning_rate": 1.5951640088162843e-07, - "loss": 2.6219, - "step": 25180 - }, - { - "epoch": 6.513115389585217, - "learning_rate": 1.5948398807208606e-07, - "loss": 2.5838, - "step": 25200 - }, - { - "epoch": 6.518284016022742, - "learning_rate": 1.5945157526254375e-07, - "loss": 2.5943, - "step": 25220 - }, - { - "epoch": 6.523452642460266, - "learning_rate": 1.5941916245300144e-07, - "loss": 2.6468, - "step": 25240 - }, - { - "epoch": 6.5286212688977905, - "learning_rate": 1.5938674964345907e-07, - "loss": 2.6726, - "step": 25260 - }, - { - "epoch": 6.533789895335315, - "learning_rate": 1.5935433683391676e-07, - "loss": 2.5732, - "step": 25280 - }, - { - "epoch": 6.538958521772839, - "learning_rate": 1.5932192402437442e-07, - "loss": 2.5739, - "step": 25300 - }, - { - "epoch": 6.544127148210363, - "learning_rate": 1.5928951121483208e-07, - "loss": 2.5914, - "step": 25320 - }, - { - "epoch": 6.549295774647887, - "learning_rate": 1.5925709840528977e-07, - "loss": 2.6142, - "step": 25340 - }, - { - "epoch": 6.554464401085411, - "learning_rate": 1.5922468559574744e-07, - "loss": 2.6145, - "step": 25360 - }, - { - "epoch": 6.559633027522936, - "learning_rate": 1.591922727862051e-07, - "loss": 2.6449, - "step": 25380 - }, - { - "epoch": 6.56480165396046, - "learning_rate": 1.5915985997666279e-07, - "loss": 2.586, - "step": 25400 - }, - { - "epoch": 6.5699702803979845, - "learning_rate": 1.5912744716712042e-07, - "loss": 2.6136, - "step": 25420 - }, - { - "epoch": 6.575138906835509, - "learning_rate": 1.590950343575781e-07, - "loss": 2.6556, - "step": 25440 - }, - { - "epoch": 6.580307533273032, - "learning_rate": 1.5906262154803577e-07, - "loss": 2.6557, - "step": 25460 - }, - { - "epoch": 6.585476159710557, - "learning_rate": 1.5903020873849343e-07, - "loss": 2.5518, - "step": 25480 - }, - { - "epoch": 6.590644786148081, - "learning_rate": 1.5899779592895112e-07, - "loss": 2.6054, - "step": 25500 - }, - { - "epoch": 6.595813412585605, - "learning_rate": 1.5896538311940878e-07, - "loss": 2.5766, - "step": 25520 - }, - { - "epoch": 6.60098203902313, - "learning_rate": 1.5893297030986644e-07, - "loss": 2.5573, - "step": 25540 - }, - { - "epoch": 6.606150665460654, - "learning_rate": 1.5890055750032413e-07, - "loss": 2.6429, - "step": 25560 - }, - { - "epoch": 6.6113192918981785, - "learning_rate": 1.588681446907818e-07, - "loss": 2.6795, - "step": 25580 - }, - { - "epoch": 6.616487918335702, - "learning_rate": 1.5883573188123946e-07, - "loss": 2.6573, - "step": 25600 - }, - { - "epoch": 6.621656544773226, - "learning_rate": 1.5880331907169712e-07, - "loss": 2.5762, - "step": 25620 - }, - { - "epoch": 6.626825171210751, - "learning_rate": 1.5877090626215478e-07, - "loss": 2.6336, - "step": 25640 - }, - { - "epoch": 6.631993797648275, - "learning_rate": 1.5873849345261247e-07, - "loss": 2.5999, - "step": 25660 - }, - { - "epoch": 6.637162424085799, - "learning_rate": 1.5870608064307013e-07, - "loss": 2.6625, - "step": 25680 - }, - { - "epoch": 6.642331050523324, - "learning_rate": 1.586736678335278e-07, - "loss": 2.5277, - "step": 25700 - }, - { - "epoch": 6.647499676960848, - "learning_rate": 1.5864125502398548e-07, - "loss": 2.5688, - "step": 25720 - }, - { - "epoch": 6.652668303398372, - "learning_rate": 1.5860884221444314e-07, - "loss": 2.5813, - "step": 25740 - }, - { - "epoch": 6.657836929835896, - "learning_rate": 1.585764294049008e-07, - "loss": 2.622, - "step": 25760 - }, - { - "epoch": 6.66300555627342, - "learning_rate": 1.5854401659535847e-07, - "loss": 2.5956, - "step": 25780 - }, - { - "epoch": 6.668174182710945, - "learning_rate": 1.5851160378581613e-07, - "loss": 2.5834, - "step": 25800 - }, - { - "epoch": 6.673342809148469, - "learning_rate": 1.5847919097627382e-07, - "loss": 2.5737, - "step": 25820 - }, - { - "epoch": 6.678511435585993, - "learning_rate": 1.5844677816673148e-07, - "loss": 2.5815, - "step": 25840 - }, - { - "epoch": 6.683680062023518, - "learning_rate": 1.5841436535718914e-07, - "loss": 2.5868, - "step": 25860 - }, - { - "epoch": 6.688848688461041, - "learning_rate": 1.5838195254764683e-07, - "loss": 2.6376, - "step": 25880 - }, - { - "epoch": 6.6940173148985656, - "learning_rate": 1.583495397381045e-07, - "loss": 2.5857, - "step": 25900 - }, - { - "epoch": 6.69918594133609, - "learning_rate": 1.5831712692856215e-07, - "loss": 2.6078, - "step": 25920 - }, - { - "epoch": 6.704354567773614, - "learning_rate": 1.5828471411901981e-07, - "loss": 2.6262, - "step": 25940 - }, - { - "epoch": 6.709523194211139, - "learning_rate": 1.582523013094775e-07, - "loss": 2.5676, - "step": 25960 - }, - { - "epoch": 6.714691820648663, - "learning_rate": 1.5821988849993516e-07, - "loss": 2.6036, - "step": 25980 - }, - { - "epoch": 6.7198604470861865, - "learning_rate": 1.5818747569039283e-07, - "loss": 2.5575, - "step": 26000 - }, - { - "epoch": 6.725029073523711, - "learning_rate": 1.581550628808505e-07, - "loss": 2.5708, - "step": 26020 - }, - { - "epoch": 6.730197699961235, - "learning_rate": 1.5812265007130818e-07, - "loss": 2.6489, - "step": 26040 - }, - { - "epoch": 6.7353663263987595, - "learning_rate": 1.5809023726176584e-07, - "loss": 2.6338, - "step": 26060 - }, - { - "epoch": 6.740534952836284, - "learning_rate": 1.580578244522235e-07, - "loss": 2.6506, - "step": 26080 - }, - { - "epoch": 6.745703579273808, - "learning_rate": 1.5802541164268116e-07, - "loss": 2.541, - "step": 26100 - }, - { - "epoch": 6.750872205711332, - "learning_rate": 1.5799299883313885e-07, - "loss": 2.5367, - "step": 26120 - }, - { - "epoch": 6.756040832148856, - "learning_rate": 1.579605860235965e-07, - "loss": 2.5891, - "step": 26140 - }, - { - "epoch": 6.7612094585863804, - "learning_rate": 1.5792817321405417e-07, - "loss": 2.6226, - "step": 26160 - }, - { - "epoch": 6.766378085023905, - "learning_rate": 1.5789576040451186e-07, - "loss": 2.6313, - "step": 26180 - }, - { - "epoch": 6.771546711461429, - "learning_rate": 1.5786334759496952e-07, - "loss": 2.5891, - "step": 26200 - }, - { - "epoch": 6.7767153378989535, - "learning_rate": 1.5783093478542719e-07, - "loss": 2.5806, - "step": 26220 - }, - { - "epoch": 6.781883964336478, - "learning_rate": 1.5779852197588485e-07, - "loss": 2.5518, - "step": 26240 - }, - { - "epoch": 6.787052590774001, - "learning_rate": 1.5776610916634254e-07, - "loss": 2.6104, - "step": 26260 - }, - { - "epoch": 6.792221217211526, - "learning_rate": 1.577336963568002e-07, - "loss": 2.6052, - "step": 26280 - }, - { - "epoch": 6.79738984364905, - "learning_rate": 1.5770128354725786e-07, - "loss": 2.5845, - "step": 26300 - }, - { - "epoch": 6.802558470086574, - "learning_rate": 1.5766887073771552e-07, - "loss": 2.6096, - "step": 26320 - }, - { - "epoch": 6.807727096524099, - "learning_rate": 1.576364579281732e-07, - "loss": 2.5916, - "step": 26340 - }, - { - "epoch": 6.812895722961623, - "learning_rate": 1.5760404511863087e-07, - "loss": 2.584, - "step": 26360 - }, - { - "epoch": 6.8180643493991475, - "learning_rate": 1.5757163230908853e-07, - "loss": 2.6194, - "step": 26380 - }, - { - "epoch": 6.823232975836671, - "learning_rate": 1.575392194995462e-07, - "loss": 2.6581, - "step": 26400 - }, - { - "epoch": 6.828401602274195, - "learning_rate": 1.5750680669000388e-07, - "loss": 2.6636, - "step": 26420 - }, - { - "epoch": 6.83357022871172, - "learning_rate": 1.5747439388046155e-07, - "loss": 2.6135, - "step": 26440 - }, - { - "epoch": 6.838738855149244, - "learning_rate": 1.574419810709192e-07, - "loss": 2.609, - "step": 26460 - }, - { - "epoch": 6.843907481586768, - "learning_rate": 1.574095682613769e-07, - "loss": 2.6168, - "step": 26480 - }, - { - "epoch": 6.849076108024293, - "learning_rate": 1.5737715545183456e-07, - "loss": 2.5408, - "step": 26500 - }, - { - "epoch": 6.854244734461817, - "learning_rate": 1.5734474264229222e-07, - "loss": 2.6252, - "step": 26520 - }, - { - "epoch": 6.859413360899341, - "learning_rate": 1.5731232983274988e-07, - "loss": 2.5935, - "step": 26540 - }, - { - "epoch": 6.864581987336865, - "learning_rate": 1.5727991702320757e-07, - "loss": 2.5767, - "step": 26560 - }, - { - "epoch": 6.869750613774389, - "learning_rate": 1.5724750421366523e-07, - "loss": 2.6351, - "step": 26580 - }, - { - "epoch": 6.874919240211914, - "learning_rate": 1.572150914041229e-07, - "loss": 2.5793, - "step": 26600 - }, - { - "epoch": 6.880087866649438, - "learning_rate": 1.5718267859458056e-07, - "loss": 2.6026, - "step": 26620 - }, - { - "epoch": 6.885256493086962, - "learning_rate": 1.5715026578503824e-07, - "loss": 2.5796, - "step": 26640 - }, - { - "epoch": 6.890425119524487, - "learning_rate": 1.571178529754959e-07, - "loss": 2.6488, - "step": 26660 - }, - { - "epoch": 6.89559374596201, - "learning_rate": 1.5708544016595357e-07, - "loss": 2.6461, - "step": 26680 - }, - { - "epoch": 6.900762372399535, - "learning_rate": 1.5705302735641123e-07, - "loss": 2.5738, - "step": 26700 - }, - { - "epoch": 6.905930998837059, - "learning_rate": 1.5702061454686892e-07, - "loss": 2.5407, - "step": 26720 - }, - { - "epoch": 6.911099625274583, - "learning_rate": 1.5698820173732658e-07, - "loss": 2.6304, - "step": 26740 - }, - { - "epoch": 6.916268251712108, - "learning_rate": 1.5695578892778424e-07, - "loss": 2.576, - "step": 26760 - }, - { - "epoch": 6.921436878149632, - "learning_rate": 1.5692337611824193e-07, - "loss": 2.5784, - "step": 26780 - }, - { - "epoch": 6.926605504587156, - "learning_rate": 1.568909633086996e-07, - "loss": 2.6124, - "step": 26800 - }, - { - "epoch": 6.93177413102468, - "learning_rate": 1.5685855049915725e-07, - "loss": 2.5992, - "step": 26820 - }, - { - "epoch": 6.936942757462204, - "learning_rate": 1.5682613768961492e-07, - "loss": 2.5961, - "step": 26840 - }, - { - "epoch": 6.942111383899729, - "learning_rate": 1.567937248800726e-07, - "loss": 2.5989, - "step": 26860 - }, - { - "epoch": 6.947280010337253, - "learning_rate": 1.5676131207053027e-07, - "loss": 2.6514, - "step": 26880 - }, - { - "epoch": 6.952448636774777, - "learning_rate": 1.5672889926098793e-07, - "loss": 2.5921, - "step": 26900 - }, - { - "epoch": 6.957617263212302, - "learning_rate": 1.566964864514456e-07, - "loss": 2.5907, - "step": 26920 - }, - { - "epoch": 6.962785889649826, - "learning_rate": 1.5666407364190328e-07, - "loss": 2.5183, - "step": 26940 - }, - { - "epoch": 6.9679545160873495, - "learning_rate": 1.5663166083236094e-07, - "loss": 2.6151, - "step": 26960 - }, - { - "epoch": 6.973123142524874, - "learning_rate": 1.565992480228186e-07, - "loss": 2.5454, - "step": 26980 - }, - { - "epoch": 6.978291768962398, - "learning_rate": 1.5656683521327626e-07, - "loss": 2.5349, - "step": 27000 - }, - { - "epoch": 6.983460395399923, - "learning_rate": 1.5653442240373395e-07, - "loss": 2.6422, - "step": 27020 - }, - { - "epoch": 6.988629021837447, - "learning_rate": 1.5650200959419161e-07, - "loss": 2.6273, - "step": 27040 - }, - { - "epoch": 6.993797648274971, - "learning_rate": 1.5646959678464928e-07, - "loss": 2.5851, - "step": 27060 - }, - { - "epoch": 6.998966274712495, - "learning_rate": 1.5643718397510696e-07, - "loss": 2.5716, - "step": 27080 - }, - { - "epoch": 7.0, - "eval_bleu": 5.2201, - "eval_gen_len": 46.1405, - "eval_loss": 2.561117172241211, - "eval_runtime": 958.6568, - "eval_samples_per_second": 1.796, - "eval_steps_per_second": 0.898, - "step": 27084 - }, - { - "epoch": 7.004134901150019, - "learning_rate": 1.5640477116556463e-07, - "loss": 2.5946, - "step": 27100 - }, - { - "epoch": 7.0093035275875435, - "learning_rate": 1.563723583560223e-07, - "loss": 2.5941, - "step": 27120 - }, - { - "epoch": 7.014472154025068, - "learning_rate": 1.5633994554647995e-07, - "loss": 2.5895, - "step": 27140 - }, - { - "epoch": 7.019640780462592, - "learning_rate": 1.5630753273693764e-07, - "loss": 2.5711, - "step": 27160 - }, - { - "epoch": 7.024809406900117, - "learning_rate": 1.562751199273953e-07, - "loss": 2.6074, - "step": 27180 - }, - { - "epoch": 7.029978033337641, - "learning_rate": 1.5624270711785296e-07, - "loss": 2.5763, - "step": 27200 - }, - { - "epoch": 7.035146659775164, - "learning_rate": 1.5621029430831062e-07, - "loss": 2.5656, - "step": 27220 - }, - { - "epoch": 7.040315286212689, - "learning_rate": 1.561778814987683e-07, - "loss": 2.576, - "step": 27240 - }, - { - "epoch": 7.045483912650213, - "learning_rate": 1.5614546868922597e-07, - "loss": 2.6323, - "step": 27260 - }, - { - "epoch": 7.0506525390877375, - "learning_rate": 1.5611305587968364e-07, - "loss": 2.6121, - "step": 27280 - }, - { - "epoch": 7.055821165525262, - "learning_rate": 1.560806430701413e-07, - "loss": 2.5962, - "step": 27300 - }, - { - "epoch": 7.060989791962786, - "learning_rate": 1.5604823026059899e-07, - "loss": 2.614, - "step": 27320 - }, - { - "epoch": 7.06615841840031, - "learning_rate": 1.5601581745105665e-07, - "loss": 2.5828, - "step": 27340 - }, - { - "epoch": 7.071327044837834, - "learning_rate": 1.559834046415143e-07, - "loss": 2.6366, - "step": 27360 - }, - { - "epoch": 7.076495671275358, - "learning_rate": 1.55950991831972e-07, - "loss": 2.5704, - "step": 27380 - }, - { - "epoch": 7.081664297712883, - "learning_rate": 1.5591857902242966e-07, - "loss": 2.6038, - "step": 27400 - }, - { - "epoch": 7.086832924150407, - "learning_rate": 1.5588616621288732e-07, - "loss": 2.5829, - "step": 27420 - }, - { - "epoch": 7.0920015505879315, - "learning_rate": 1.5585375340334498e-07, - "loss": 2.5535, - "step": 27440 - }, - { - "epoch": 7.097170177025456, - "learning_rate": 1.5582134059380267e-07, - "loss": 2.6527, - "step": 27460 - }, - { - "epoch": 7.102338803462979, - "learning_rate": 1.5578892778426033e-07, - "loss": 2.5602, - "step": 27480 - }, - { - "epoch": 7.107507429900504, - "learning_rate": 1.55756514974718e-07, - "loss": 2.5519, - "step": 27500 - }, - { - "epoch": 7.112676056338028, - "learning_rate": 1.5572410216517566e-07, - "loss": 2.6072, - "step": 27520 - }, - { - "epoch": 7.117844682775552, - "learning_rate": 1.5569168935563335e-07, - "loss": 2.5692, - "step": 27540 - }, - { - "epoch": 7.123013309213077, - "learning_rate": 1.55659276546091e-07, - "loss": 2.5865, - "step": 27560 - }, - { - "epoch": 7.128181935650601, - "learning_rate": 1.5562686373654867e-07, - "loss": 2.5513, - "step": 27580 - }, - { - "epoch": 7.1333505620881255, - "learning_rate": 1.5559445092700633e-07, - "loss": 2.5622, - "step": 27600 - }, - { - "epoch": 7.138519188525649, - "learning_rate": 1.5556203811746402e-07, - "loss": 2.5787, - "step": 27620 - }, - { - "epoch": 7.143687814963173, - "learning_rate": 1.5552962530792168e-07, - "loss": 2.5697, - "step": 27640 - }, - { - "epoch": 7.148856441400698, - "learning_rate": 1.5549721249837934e-07, - "loss": 2.5477, - "step": 27660 - }, - { - "epoch": 7.154025067838222, - "learning_rate": 1.5546479968883703e-07, - "loss": 2.5206, - "step": 27680 - }, - { - "epoch": 7.159193694275746, - "learning_rate": 1.554323868792947e-07, - "loss": 2.6325, - "step": 27700 - }, - { - "epoch": 7.164362320713271, - "learning_rate": 1.5539997406975236e-07, - "loss": 2.5897, - "step": 27720 - }, - { - "epoch": 7.169530947150795, - "learning_rate": 1.5536756126021002e-07, - "loss": 2.5669, - "step": 27740 - }, - { - "epoch": 7.174699573588319, - "learning_rate": 1.553351484506677e-07, - "loss": 2.5391, - "step": 27760 - }, - { - "epoch": 7.179868200025843, - "learning_rate": 1.5530273564112537e-07, - "loss": 2.5715, - "step": 27780 - }, - { - "epoch": 7.185036826463367, - "learning_rate": 1.5527032283158303e-07, - "loss": 2.6015, - "step": 27800 - }, - { - "epoch": 7.190205452900892, - "learning_rate": 1.552379100220407e-07, - "loss": 2.6043, - "step": 27820 - }, - { - "epoch": 7.195374079338416, - "learning_rate": 1.5520549721249838e-07, - "loss": 2.5723, - "step": 27840 - }, - { - "epoch": 7.20054270577594, - "learning_rate": 1.5517308440295604e-07, - "loss": 2.5877, - "step": 27860 - }, - { - "epoch": 7.205711332213465, - "learning_rate": 1.551406715934137e-07, - "loss": 2.5823, - "step": 27880 - }, - { - "epoch": 7.210879958650988, - "learning_rate": 1.5510825878387137e-07, - "loss": 2.5435, - "step": 27900 - }, - { - "epoch": 7.2160485850885125, - "learning_rate": 1.5507584597432905e-07, - "loss": 2.5416, - "step": 27920 - }, - { - "epoch": 7.221217211526037, - "learning_rate": 1.5504343316478672e-07, - "loss": 2.6001, - "step": 27940 - }, - { - "epoch": 7.226385837963561, - "learning_rate": 1.5501102035524438e-07, - "loss": 2.6136, - "step": 27960 - }, - { - "epoch": 7.231554464401086, - "learning_rate": 1.5497860754570204e-07, - "loss": 2.6079, - "step": 27980 - }, - { - "epoch": 7.23672309083861, - "learning_rate": 1.5494619473615973e-07, - "loss": 2.6051, - "step": 28000 - }, - { - "epoch": 7.2418917172761335, - "learning_rate": 1.549137819266174e-07, - "loss": 2.6275, - "step": 28020 - }, - { - "epoch": 7.247060343713658, - "learning_rate": 1.5488136911707505e-07, - "loss": 2.6114, - "step": 28040 - }, - { - "epoch": 7.252228970151182, - "learning_rate": 1.5484895630753274e-07, - "loss": 2.6532, - "step": 28060 - }, - { - "epoch": 7.2573975965887065, - "learning_rate": 1.548165434979904e-07, - "loss": 2.5954, - "step": 28080 - }, - { - "epoch": 7.262566223026231, - "learning_rate": 1.5478413068844806e-07, - "loss": 2.555, - "step": 28100 - }, - { - "epoch": 7.267734849463755, - "learning_rate": 1.5475171787890573e-07, - "loss": 2.601, - "step": 28120 - }, - { - "epoch": 7.27290347590128, - "learning_rate": 1.547193050693634e-07, - "loss": 2.5666, - "step": 28140 - }, - { - "epoch": 7.278072102338803, - "learning_rate": 1.5468689225982108e-07, - "loss": 2.563, - "step": 28160 - }, - { - "epoch": 7.283240728776327, - "learning_rate": 1.5465447945027874e-07, - "loss": 2.5919, - "step": 28180 - }, - { - "epoch": 7.288409355213852, - "learning_rate": 1.546220666407364e-07, - "loss": 2.5042, - "step": 28200 - }, - { - "epoch": 7.293577981651376, - "learning_rate": 1.545896538311941e-07, - "loss": 2.614, - "step": 28220 - }, - { - "epoch": 7.2987466080889005, - "learning_rate": 1.5455724102165175e-07, - "loss": 2.5763, - "step": 28240 - }, - { - "epoch": 7.303915234526425, - "learning_rate": 1.545248282121094e-07, - "loss": 2.5852, - "step": 28260 - }, - { - "epoch": 7.309083860963949, - "learning_rate": 1.544924154025671e-07, - "loss": 2.638, - "step": 28280 - }, - { - "epoch": 7.314252487401473, - "learning_rate": 1.5446000259302476e-07, - "loss": 2.5642, - "step": 28300 - }, - { - "epoch": 7.319421113838997, - "learning_rate": 1.5442758978348242e-07, - "loss": 2.5905, - "step": 28320 - }, - { - "epoch": 7.324589740276521, - "learning_rate": 1.5439517697394009e-07, - "loss": 2.5641, - "step": 28340 - }, - { - "epoch": 7.329758366714046, - "learning_rate": 1.5436276416439775e-07, - "loss": 2.5369, - "step": 28360 - }, - { - "epoch": 7.33492699315157, - "learning_rate": 1.5433035135485544e-07, - "loss": 2.5828, - "step": 28380 - }, - { - "epoch": 7.3400956195890945, - "learning_rate": 1.542979385453131e-07, - "loss": 2.6183, - "step": 28400 - }, - { - "epoch": 7.345264246026618, - "learning_rate": 1.5426552573577076e-07, - "loss": 2.5041, - "step": 28420 - }, - { - "epoch": 7.350432872464142, - "learning_rate": 1.5423311292622845e-07, - "loss": 2.6381, - "step": 28440 - }, - { - "epoch": 7.355601498901667, - "learning_rate": 1.542007001166861e-07, - "loss": 2.607, - "step": 28460 - }, - { - "epoch": 7.360770125339191, - "learning_rate": 1.5416828730714377e-07, - "loss": 2.5654, - "step": 28480 - }, - { - "epoch": 7.365938751776715, - "learning_rate": 1.5413587449760143e-07, - "loss": 2.5333, - "step": 28500 - }, - { - "epoch": 7.37110737821424, - "learning_rate": 1.5410346168805912e-07, - "loss": 2.5771, - "step": 28520 - }, - { - "epoch": 7.376276004651764, - "learning_rate": 1.5407104887851678e-07, - "loss": 2.5225, - "step": 28540 - }, - { - "epoch": 7.381444631089288, - "learning_rate": 1.5403863606897445e-07, - "loss": 2.6159, - "step": 28560 - }, - { - "epoch": 7.386613257526812, - "learning_rate": 1.540062232594321e-07, - "loss": 2.6229, - "step": 28580 - }, - { - "epoch": 7.391781883964336, - "learning_rate": 1.539738104498898e-07, - "loss": 2.5326, - "step": 28600 - }, - { - "epoch": 7.396950510401861, - "learning_rate": 1.5394139764034746e-07, - "loss": 2.5116, - "step": 28620 - }, - { - "epoch": 7.402119136839385, - "learning_rate": 1.5390898483080512e-07, - "loss": 2.5276, - "step": 28640 - }, - { - "epoch": 7.407287763276909, - "learning_rate": 1.538765720212628e-07, - "loss": 2.5662, - "step": 28660 - }, - { - "epoch": 7.412456389714434, - "learning_rate": 1.5384415921172047e-07, - "loss": 2.5843, - "step": 28680 - }, - { - "epoch": 7.417625016151957, - "learning_rate": 1.5381174640217813e-07, - "loss": 2.6341, - "step": 28700 - }, - { - "epoch": 7.422793642589482, - "learning_rate": 1.537793335926358e-07, - "loss": 2.5741, - "step": 28720 - }, - { - "epoch": 7.427962269027006, - "learning_rate": 1.5374692078309346e-07, - "loss": 2.5658, - "step": 28740 - }, - { - "epoch": 7.43313089546453, - "learning_rate": 1.5371450797355114e-07, - "loss": 2.5909, - "step": 28760 - }, - { - "epoch": 7.438299521902055, - "learning_rate": 1.536820951640088e-07, - "loss": 2.5543, - "step": 28780 - }, - { - "epoch": 7.443468148339579, - "learning_rate": 1.5364968235446647e-07, - "loss": 2.5546, - "step": 28800 - }, - { - "epoch": 7.448636774777103, - "learning_rate": 1.5361726954492416e-07, - "loss": 2.5236, - "step": 28820 - }, - { - "epoch": 7.453805401214627, - "learning_rate": 1.5358485673538182e-07, - "loss": 2.5981, - "step": 28840 - }, - { - "epoch": 7.458974027652151, - "learning_rate": 1.5355244392583948e-07, - "loss": 2.5322, - "step": 28860 - }, - { - "epoch": 7.464142654089676, - "learning_rate": 1.5352003111629717e-07, - "loss": 2.6326, - "step": 28880 - }, - { - "epoch": 7.4693112805272, - "learning_rate": 1.5348761830675483e-07, - "loss": 2.5647, - "step": 28900 - }, - { - "epoch": 7.474479906964724, - "learning_rate": 1.534552054972125e-07, - "loss": 2.6036, - "step": 28920 - }, - { - "epoch": 7.479648533402249, - "learning_rate": 1.5342279268767015e-07, - "loss": 2.5672, - "step": 28940 - }, - { - "epoch": 7.484817159839773, - "learning_rate": 1.5339037987812782e-07, - "loss": 2.5102, - "step": 28960 - }, - { - "epoch": 7.4899857862772965, - "learning_rate": 1.533579670685855e-07, - "loss": 2.6278, - "step": 28980 - }, - { - "epoch": 7.495154412714821, - "learning_rate": 1.5332555425904317e-07, - "loss": 2.6092, - "step": 29000 - }, - { - "epoch": 7.500323039152345, - "learning_rate": 1.5329314144950083e-07, - "loss": 2.5685, - "step": 29020 - }, - { - "epoch": 7.50549166558987, - "learning_rate": 1.5326072863995852e-07, - "loss": 2.5203, - "step": 29040 - }, - { - "epoch": 7.510660292027394, - "learning_rate": 1.5322831583041618e-07, - "loss": 2.6086, - "step": 29060 - }, - { - "epoch": 7.515828918464918, - "learning_rate": 1.5319590302087384e-07, - "loss": 2.556, - "step": 29080 - }, - { - "epoch": 7.520997544902443, - "learning_rate": 1.531634902113315e-07, - "loss": 2.6068, - "step": 29100 - }, - { - "epoch": 7.526166171339966, - "learning_rate": 1.531310774017892e-07, - "loss": 2.5915, - "step": 29120 - }, - { - "epoch": 7.5313347977774905, - "learning_rate": 1.5309866459224685e-07, - "loss": 2.5853, - "step": 29140 - }, - { - "epoch": 7.536503424215015, - "learning_rate": 1.5306625178270451e-07, - "loss": 2.5933, - "step": 29160 - }, - { - "epoch": 7.541672050652539, - "learning_rate": 1.5303383897316218e-07, - "loss": 2.5732, - "step": 29180 - }, - { - "epoch": 7.546840677090064, - "learning_rate": 1.5300142616361986e-07, - "loss": 2.5925, - "step": 29200 - }, - { - "epoch": 7.552009303527588, - "learning_rate": 1.5296901335407753e-07, - "loss": 2.6029, - "step": 29220 - }, - { - "epoch": 7.557177929965111, - "learning_rate": 1.529366005445352e-07, - "loss": 2.6053, - "step": 29240 - }, - { - "epoch": 7.562346556402636, - "learning_rate": 1.5290418773499288e-07, - "loss": 2.5341, - "step": 29260 - }, - { - "epoch": 7.56751518284016, - "learning_rate": 1.5287177492545054e-07, - "loss": 2.4981, - "step": 29280 - }, - { - "epoch": 7.5726838092776845, - "learning_rate": 1.528393621159082e-07, - "loss": 2.5551, - "step": 29300 - }, - { - "epoch": 7.577852435715209, - "learning_rate": 1.5280694930636586e-07, - "loss": 2.5029, - "step": 29320 - }, - { - "epoch": 7.583021062152733, - "learning_rate": 1.5277453649682352e-07, - "loss": 2.6108, - "step": 29340 - }, - { - "epoch": 7.588189688590257, - "learning_rate": 1.527421236872812e-07, - "loss": 2.5652, - "step": 29360 - }, - { - "epoch": 7.593358315027781, - "learning_rate": 1.5270971087773887e-07, - "loss": 2.5423, - "step": 29380 - }, - { - "epoch": 7.598526941465305, - "learning_rate": 1.5267729806819654e-07, - "loss": 2.5123, - "step": 29400 - }, - { - "epoch": 7.60369556790283, - "learning_rate": 1.5264488525865422e-07, - "loss": 2.5787, - "step": 29420 - }, - { - "epoch": 7.608864194340354, - "learning_rate": 1.5261247244911189e-07, - "loss": 2.5894, - "step": 29440 - }, - { - "epoch": 7.6140328207778785, - "learning_rate": 1.5258005963956955e-07, - "loss": 2.5427, - "step": 29460 - }, - { - "epoch": 7.619201447215403, - "learning_rate": 1.5254764683002724e-07, - "loss": 2.6234, - "step": 29480 - }, - { - "epoch": 7.624370073652926, - "learning_rate": 1.525152340204849e-07, - "loss": 2.5201, - "step": 29500 - }, - { - "epoch": 7.629538700090451, - "learning_rate": 1.5248282121094256e-07, - "loss": 2.5933, - "step": 29520 - }, - { - "epoch": 7.634707326527975, - "learning_rate": 1.5245040840140022e-07, - "loss": 2.555, - "step": 29540 - }, - { - "epoch": 7.639875952965499, - "learning_rate": 1.5241799559185788e-07, - "loss": 2.5297, - "step": 29560 - }, - { - "epoch": 7.645044579403024, - "learning_rate": 1.5238558278231557e-07, - "loss": 2.574, - "step": 29580 - }, - { - "epoch": 7.650213205840548, - "learning_rate": 1.5235316997277323e-07, - "loss": 2.6062, - "step": 29600 - }, - { - "epoch": 7.6553818322780725, - "learning_rate": 1.523207571632309e-07, - "loss": 2.5437, - "step": 29620 - }, - { - "epoch": 7.660550458715596, - "learning_rate": 1.5228834435368858e-07, - "loss": 2.599, - "step": 29640 - }, - { - "epoch": 7.66571908515312, - "learning_rate": 1.5225593154414625e-07, - "loss": 2.5211, - "step": 29660 - }, - { - "epoch": 7.670887711590645, - "learning_rate": 1.522235187346039e-07, - "loss": 2.538, - "step": 29680 - }, - { - "epoch": 7.676056338028169, - "learning_rate": 1.5219110592506157e-07, - "loss": 2.5887, - "step": 29700 - }, - { - "epoch": 7.681224964465693, - "learning_rate": 1.5215869311551926e-07, - "loss": 2.5519, - "step": 29720 - }, - { - "epoch": 7.686393590903218, - "learning_rate": 1.5212628030597692e-07, - "loss": 2.5509, - "step": 29740 - }, - { - "epoch": 7.691562217340742, - "learning_rate": 1.5209386749643458e-07, - "loss": 2.5353, - "step": 29760 - }, - { - "epoch": 7.6967308437782656, - "learning_rate": 1.5206145468689224e-07, - "loss": 2.6387, - "step": 29780 - }, - { - "epoch": 7.70189947021579, - "learning_rate": 1.5202904187734993e-07, - "loss": 2.5547, - "step": 29800 - }, - { - "epoch": 7.707068096653314, - "learning_rate": 1.519966290678076e-07, - "loss": 2.65, - "step": 29820 - }, - { - "epoch": 7.712236723090839, - "learning_rate": 1.5196421625826526e-07, - "loss": 2.5733, - "step": 29840 - }, - { - "epoch": 7.717405349528363, - "learning_rate": 1.5193180344872294e-07, - "loss": 2.5608, - "step": 29860 - }, - { - "epoch": 7.722573975965887, - "learning_rate": 1.518993906391806e-07, - "loss": 2.5819, - "step": 29880 - }, - { - "epoch": 7.727742602403412, - "learning_rate": 1.5186697782963827e-07, - "loss": 2.6017, - "step": 29900 - }, - { - "epoch": 7.732911228840935, - "learning_rate": 1.5183456502009593e-07, - "loss": 2.5467, - "step": 29920 - }, - { - "epoch": 7.7380798552784595, - "learning_rate": 1.518021522105536e-07, - "loss": 2.558, - "step": 29940 - }, - { - "epoch": 7.743248481715984, - "learning_rate": 1.5176973940101128e-07, - "loss": 2.5716, - "step": 29960 - }, - { - "epoch": 7.748417108153508, - "learning_rate": 1.5173732659146894e-07, - "loss": 2.5765, - "step": 29980 - }, - { - "epoch": 7.753585734591033, - "learning_rate": 1.517049137819266e-07, - "loss": 2.6051, - "step": 30000 - }, - { - "epoch": 7.758754361028557, - "learning_rate": 1.516725009723843e-07, - "loss": 2.6049, - "step": 30020 - }, - { - "epoch": 7.763922987466081, - "learning_rate": 1.5164008816284195e-07, - "loss": 2.5621, - "step": 30040 - }, - { - "epoch": 7.769091613903605, - "learning_rate": 1.5160767535329962e-07, - "loss": 2.5449, - "step": 30060 - }, - { - "epoch": 7.774260240341129, - "learning_rate": 1.515752625437573e-07, - "loss": 2.5652, - "step": 30080 - }, - { - "epoch": 7.7794288667786535, - "learning_rate": 1.5154284973421497e-07, - "loss": 2.5216, - "step": 30100 - }, - { - "epoch": 7.784597493216178, - "learning_rate": 1.5151043692467263e-07, - "loss": 2.521, - "step": 30120 - }, - { - "epoch": 7.789766119653702, - "learning_rate": 1.514780241151303e-07, - "loss": 2.5561, - "step": 30140 - }, - { - "epoch": 7.794934746091227, - "learning_rate": 1.5144561130558795e-07, - "loss": 2.5601, - "step": 30160 - }, - { - "epoch": 7.800103372528751, - "learning_rate": 1.5141319849604564e-07, - "loss": 2.5599, - "step": 30180 - }, - { - "epoch": 7.805271998966274, - "learning_rate": 1.513807856865033e-07, - "loss": 2.6198, - "step": 30200 - }, - { - "epoch": 7.810440625403799, - "learning_rate": 1.5134837287696096e-07, - "loss": 2.6023, - "step": 30220 - }, - { - "epoch": 7.815609251841323, - "learning_rate": 1.5131596006741865e-07, - "loss": 2.5248, - "step": 30240 - }, - { - "epoch": 7.8207778782788475, - "learning_rate": 1.5128354725787631e-07, - "loss": 2.5971, - "step": 30260 - }, - { - "epoch": 7.825946504716372, - "learning_rate": 1.5125113444833398e-07, - "loss": 2.5428, - "step": 30280 - }, - { - "epoch": 7.831115131153896, - "learning_rate": 1.5121872163879164e-07, - "loss": 2.5757, - "step": 30300 - }, - { - "epoch": 7.83628375759142, - "learning_rate": 1.5118630882924933e-07, - "loss": 2.561, - "step": 30320 - }, - { - "epoch": 7.841452384028944, - "learning_rate": 1.51153896019707e-07, - "loss": 2.5586, - "step": 30340 - }, - { - "epoch": 7.846621010466468, - "learning_rate": 1.5112148321016465e-07, - "loss": 2.612, - "step": 30360 - }, - { - "epoch": 7.851789636903993, - "learning_rate": 1.510890704006223e-07, - "loss": 2.5565, - "step": 30380 - }, - { - "epoch": 7.856958263341517, - "learning_rate": 1.5105665759108e-07, - "loss": 2.5571, - "step": 30400 - }, - { - "epoch": 7.8621268897790415, - "learning_rate": 1.5102424478153766e-07, - "loss": 2.5895, - "step": 30420 - }, - { - "epoch": 7.867295516216565, - "learning_rate": 1.5099183197199532e-07, - "loss": 2.5987, - "step": 30440 - }, - { - "epoch": 7.872464142654089, - "learning_rate": 1.5095941916245298e-07, - "loss": 2.5516, - "step": 30460 - }, - { - "epoch": 7.877632769091614, - "learning_rate": 1.5092700635291067e-07, - "loss": 2.5649, - "step": 30480 - }, - { - "epoch": 7.882801395529138, - "learning_rate": 1.5089459354336834e-07, - "loss": 2.5498, - "step": 30500 - }, - { - "epoch": 7.887970021966662, - "learning_rate": 1.50862180733826e-07, - "loss": 2.6242, - "step": 30520 - }, - { - "epoch": 7.893138648404187, - "learning_rate": 1.5082976792428366e-07, - "loss": 2.5774, - "step": 30540 - }, - { - "epoch": 7.898307274841711, - "learning_rate": 1.5079735511474135e-07, - "loss": 2.5298, - "step": 30560 - }, - { - "epoch": 7.903475901279235, - "learning_rate": 1.50764942305199e-07, - "loss": 2.55, - "step": 30580 - }, - { - "epoch": 7.908644527716759, - "learning_rate": 1.5073252949565667e-07, - "loss": 2.5924, - "step": 30600 - }, - { - "epoch": 7.913813154154283, - "learning_rate": 1.5070011668611436e-07, - "loss": 2.515, - "step": 30620 - }, - { - "epoch": 7.918981780591808, - "learning_rate": 1.5066770387657202e-07, - "loss": 2.5605, - "step": 30640 - }, - { - "epoch": 7.924150407029332, - "learning_rate": 1.5063529106702968e-07, - "loss": 2.6101, - "step": 30660 - }, - { - "epoch": 7.929319033466856, - "learning_rate": 1.5060287825748734e-07, - "loss": 2.5612, - "step": 30680 - }, - { - "epoch": 7.934487659904381, - "learning_rate": 1.5057046544794503e-07, - "loss": 2.6157, - "step": 30700 - }, - { - "epoch": 7.939656286341904, - "learning_rate": 1.505380526384027e-07, - "loss": 2.5566, - "step": 30720 - }, - { - "epoch": 7.944824912779429, - "learning_rate": 1.5050563982886036e-07, - "loss": 2.5501, - "step": 30740 - }, - { - "epoch": 7.949993539216953, - "learning_rate": 1.5047322701931802e-07, - "loss": 2.5918, - "step": 30760 - }, - { - "epoch": 7.955162165654477, - "learning_rate": 1.504408142097757e-07, - "loss": 2.5722, - "step": 30780 - }, - { - "epoch": 7.960330792092002, - "learning_rate": 1.5040840140023337e-07, - "loss": 2.5233, - "step": 30800 - }, - { - "epoch": 7.965499418529526, - "learning_rate": 1.5037598859069103e-07, - "loss": 2.5999, - "step": 30820 - }, - { - "epoch": 7.97066804496705, - "learning_rate": 1.503435757811487e-07, - "loss": 2.5356, - "step": 30840 - }, - { - "epoch": 7.975836671404574, - "learning_rate": 1.5031116297160638e-07, - "loss": 2.5524, - "step": 30860 - }, - { - "epoch": 7.981005297842098, - "learning_rate": 1.5027875016206404e-07, - "loss": 2.6066, - "step": 30880 - }, - { - "epoch": 7.986173924279623, - "learning_rate": 1.502463373525217e-07, - "loss": 2.5563, - "step": 30900 - }, - { - "epoch": 7.991342550717147, - "learning_rate": 1.502139245429794e-07, - "loss": 2.5131, - "step": 30920 - }, - { - "epoch": 7.996511177154671, - "learning_rate": 1.5018151173343705e-07, - "loss": 2.5943, - "step": 30940 - }, - { - "epoch": 7.999870784339062, - "eval_bleu": 8.0263, - "eval_gen_len": 40.7538, - "eval_loss": 2.5299909114837646, - "eval_runtime": 863.7677, - "eval_samples_per_second": 1.994, - "eval_steps_per_second": 0.997, - "step": 30953 - }, - { - "epoch": 8.001679803592195, - "learning_rate": 1.5014909892389472e-07, - "loss": 2.5393, - "step": 30960 - }, - { - "epoch": 8.00684843002972, - "learning_rate": 1.5011668611435238e-07, - "loss": 2.4841, - "step": 30980 - }, - { - "epoch": 8.012017056467243, - "learning_rate": 1.5008427330481007e-07, - "loss": 2.5913, - "step": 31000 - }, - { - "epoch": 8.017185682904769, - "learning_rate": 1.5005186049526773e-07, - "loss": 2.5053, - "step": 31020 - }, - { - "epoch": 8.022354309342292, - "learning_rate": 1.500194476857254e-07, - "loss": 2.5612, - "step": 31040 - }, - { - "epoch": 8.027522935779816, - "learning_rate": 1.4998703487618305e-07, - "loss": 2.5478, - "step": 31060 - }, - { - "epoch": 8.032691562217341, - "learning_rate": 1.4995462206664074e-07, - "loss": 2.5783, - "step": 31080 - }, - { - "epoch": 8.037860188654864, - "learning_rate": 1.499222092570984e-07, - "loss": 2.5484, - "step": 31100 - }, - { - "epoch": 8.04302881509239, - "learning_rate": 1.4988979644755606e-07, - "loss": 2.6095, - "step": 31120 - }, - { - "epoch": 8.048197441529913, - "learning_rate": 1.4985738363801373e-07, - "loss": 2.5052, - "step": 31140 - }, - { - "epoch": 8.053366067967438, - "learning_rate": 1.4982497082847141e-07, - "loss": 2.4753, - "step": 31160 - }, - { - "epoch": 8.058534694404962, - "learning_rate": 1.4979255801892908e-07, - "loss": 2.5393, - "step": 31180 - }, - { - "epoch": 8.063703320842485, - "learning_rate": 1.4976014520938674e-07, - "loss": 2.5935, - "step": 31200 - }, - { - "epoch": 8.06887194728001, - "learning_rate": 1.497277323998444e-07, - "loss": 2.5911, - "step": 31220 - }, - { - "epoch": 8.074040573717534, - "learning_rate": 1.496953195903021e-07, - "loss": 2.572, - "step": 31240 - }, - { - "epoch": 8.07920920015506, - "learning_rate": 1.4966290678075975e-07, - "loss": 2.5707, - "step": 31260 - }, - { - "epoch": 8.084377826592583, - "learning_rate": 1.496304939712174e-07, - "loss": 2.5847, - "step": 31280 - }, - { - "epoch": 8.089546453030108, - "learning_rate": 1.4959808116167507e-07, - "loss": 2.5486, - "step": 31300 - }, - { - "epoch": 8.094715079467631, - "learning_rate": 1.4956566835213276e-07, - "loss": 2.5432, - "step": 31320 - }, - { - "epoch": 8.099883705905155, - "learning_rate": 1.4953325554259042e-07, - "loss": 2.5547, - "step": 31340 - }, - { - "epoch": 8.10505233234268, - "learning_rate": 1.4950084273304809e-07, - "loss": 2.5349, - "step": 31360 - }, - { - "epoch": 8.110220958780204, - "learning_rate": 1.4946842992350577e-07, - "loss": 2.5603, - "step": 31380 - }, - { - "epoch": 8.115389585217729, - "learning_rate": 1.4943601711396344e-07, - "loss": 2.5763, - "step": 31400 - }, - { - "epoch": 8.120558211655252, - "learning_rate": 1.494036043044211e-07, - "loss": 2.5194, - "step": 31420 - }, - { - "epoch": 8.125726838092778, - "learning_rate": 1.4937119149487876e-07, - "loss": 2.5414, - "step": 31440 - }, - { - "epoch": 8.130895464530301, - "learning_rate": 1.4933877868533645e-07, - "loss": 2.5459, - "step": 31460 - }, - { - "epoch": 8.136064090967825, - "learning_rate": 1.493063658757941e-07, - "loss": 2.5307, - "step": 31480 - }, - { - "epoch": 8.14123271740535, - "learning_rate": 1.4927395306625177e-07, - "loss": 2.4837, - "step": 31500 - }, - { - "epoch": 8.146401343842873, - "learning_rate": 1.4924154025670943e-07, - "loss": 2.5149, - "step": 31520 - }, - { - "epoch": 8.151569970280399, - "learning_rate": 1.4920912744716712e-07, - "loss": 2.4708, - "step": 31540 - }, - { - "epoch": 8.156738596717922, - "learning_rate": 1.4917671463762478e-07, - "loss": 2.5476, - "step": 31560 - }, - { - "epoch": 8.161907223155447, - "learning_rate": 1.4914430182808245e-07, - "loss": 2.5492, - "step": 31580 - }, - { - "epoch": 8.16707584959297, - "learning_rate": 1.491118890185401e-07, - "loss": 2.5697, - "step": 31600 - }, - { - "epoch": 8.172244476030494, - "learning_rate": 1.490794762089978e-07, - "loss": 2.5333, - "step": 31620 - }, - { - "epoch": 8.17741310246802, - "learning_rate": 1.4904706339945546e-07, - "loss": 2.5197, - "step": 31640 - }, - { - "epoch": 8.182581728905543, - "learning_rate": 1.4901465058991312e-07, - "loss": 2.4984, - "step": 31660 - }, - { - "epoch": 8.187750355343068, - "learning_rate": 1.489822377803708e-07, - "loss": 2.5087, - "step": 31680 - }, - { - "epoch": 8.192918981780592, - "learning_rate": 1.4894982497082847e-07, - "loss": 2.5842, - "step": 31700 - }, - { - "epoch": 8.198087608218117, - "learning_rate": 1.4891741216128613e-07, - "loss": 2.5249, - "step": 31720 - }, - { - "epoch": 8.20325623465564, - "learning_rate": 1.488849993517438e-07, - "loss": 2.5729, - "step": 31740 - }, - { - "epoch": 8.208424861093164, - "learning_rate": 1.4885258654220148e-07, - "loss": 2.5038, - "step": 31760 - }, - { - "epoch": 8.213593487530689, - "learning_rate": 1.4882017373265914e-07, - "loss": 2.512, - "step": 31780 - }, - { - "epoch": 8.218762113968213, - "learning_rate": 1.487877609231168e-07, - "loss": 2.6194, - "step": 31800 - }, - { - "epoch": 8.223930740405738, - "learning_rate": 1.4875534811357447e-07, - "loss": 2.5354, - "step": 31820 - }, - { - "epoch": 8.229099366843261, - "learning_rate": 1.4872293530403216e-07, - "loss": 2.577, - "step": 31840 - }, - { - "epoch": 8.234267993280785, - "learning_rate": 1.4869052249448982e-07, - "loss": 2.5439, - "step": 31860 - }, - { - "epoch": 8.23943661971831, - "learning_rate": 1.4865810968494748e-07, - "loss": 2.4729, - "step": 31880 - }, - { - "epoch": 8.244605246155833, - "learning_rate": 1.4862569687540514e-07, - "loss": 2.5979, - "step": 31900 - }, - { - "epoch": 8.249773872593359, - "learning_rate": 1.4859328406586283e-07, - "loss": 2.5676, - "step": 31920 - }, - { - "epoch": 8.254942499030882, - "learning_rate": 1.485608712563205e-07, - "loss": 2.5859, - "step": 31940 - }, - { - "epoch": 8.260111125468407, - "learning_rate": 1.4852845844677815e-07, - "loss": 2.558, - "step": 31960 - }, - { - "epoch": 8.265279751905931, - "learning_rate": 1.4849604563723584e-07, - "loss": 2.5678, - "step": 31980 - }, - { - "epoch": 8.270448378343456, - "learning_rate": 1.484636328276935e-07, - "loss": 2.5299, - "step": 32000 - }, - { - "epoch": 8.27561700478098, - "learning_rate": 1.4843122001815117e-07, - "loss": 2.5439, - "step": 32020 - }, - { - "epoch": 8.280785631218503, - "learning_rate": 1.4839880720860883e-07, - "loss": 2.4766, - "step": 32040 - }, - { - "epoch": 8.285954257656028, - "learning_rate": 1.4836639439906652e-07, - "loss": 2.5289, - "step": 32060 - }, - { - "epoch": 8.291122884093552, - "learning_rate": 1.4833398158952418e-07, - "loss": 2.5708, - "step": 32080 - }, - { - "epoch": 8.296291510531077, - "learning_rate": 1.4830156877998184e-07, - "loss": 2.4981, - "step": 32100 - }, - { - "epoch": 8.3014601369686, - "learning_rate": 1.482691559704395e-07, - "loss": 2.5352, - "step": 32120 - }, - { - "epoch": 8.306628763406124, - "learning_rate": 1.482367431608972e-07, - "loss": 2.558, - "step": 32140 - }, - { - "epoch": 8.31179738984365, - "learning_rate": 1.4820433035135485e-07, - "loss": 2.503, - "step": 32160 - }, - { - "epoch": 8.316966016281173, - "learning_rate": 1.4817191754181251e-07, - "loss": 2.5792, - "step": 32180 - }, - { - "epoch": 8.322134642718698, - "learning_rate": 1.4813950473227018e-07, - "loss": 2.5312, - "step": 32200 - }, - { - "epoch": 8.327303269156221, - "learning_rate": 1.4810709192272786e-07, - "loss": 2.5503, - "step": 32220 - }, - { - "epoch": 8.332471895593747, - "learning_rate": 1.4807467911318553e-07, - "loss": 2.5442, - "step": 32240 - }, - { - "epoch": 8.33764052203127, - "learning_rate": 1.480422663036432e-07, - "loss": 2.5578, - "step": 32260 - }, - { - "epoch": 8.342809148468794, - "learning_rate": 1.4800985349410088e-07, - "loss": 2.5253, - "step": 32280 - }, - { - "epoch": 8.347977774906319, - "learning_rate": 1.4797744068455854e-07, - "loss": 2.4777, - "step": 32300 - }, - { - "epoch": 8.353146401343842, - "learning_rate": 1.479450278750162e-07, - "loss": 2.525, - "step": 32320 - }, - { - "epoch": 8.358315027781368, - "learning_rate": 1.4791261506547386e-07, - "loss": 2.5804, - "step": 32340 - }, - { - "epoch": 8.363483654218891, - "learning_rate": 1.4788020225593155e-07, - "loss": 2.5387, - "step": 32360 - }, - { - "epoch": 8.368652280656416, - "learning_rate": 1.478477894463892e-07, - "loss": 2.5867, - "step": 32380 - }, - { - "epoch": 8.37382090709394, - "learning_rate": 1.4781537663684687e-07, - "loss": 2.5073, - "step": 32400 - }, - { - "epoch": 8.378989533531463, - "learning_rate": 1.4778296382730454e-07, - "loss": 2.5671, - "step": 32420 - }, - { - "epoch": 8.384158159968988, - "learning_rate": 1.4775055101776222e-07, - "loss": 2.5598, - "step": 32440 - }, - { - "epoch": 8.389326786406512, - "learning_rate": 1.4771813820821989e-07, - "loss": 2.552, - "step": 32460 - }, - { - "epoch": 8.394495412844037, - "learning_rate": 1.4768572539867755e-07, - "loss": 2.5682, - "step": 32480 - }, - { - "epoch": 8.39966403928156, - "learning_rate": 1.476533125891352e-07, - "loss": 2.5659, - "step": 32500 - }, - { - "epoch": 8.404832665719086, - "learning_rate": 1.476208997795929e-07, - "loss": 2.5365, - "step": 32520 - }, - { - "epoch": 8.41000129215661, - "learning_rate": 1.4758848697005056e-07, - "loss": 2.5348, - "step": 32540 - }, - { - "epoch": 8.415169918594133, - "learning_rate": 1.4755607416050822e-07, - "loss": 2.5808, - "step": 32560 - }, - { - "epoch": 8.420338545031658, - "learning_rate": 1.475236613509659e-07, - "loss": 2.5285, - "step": 32580 - }, - { - "epoch": 8.425507171469182, - "learning_rate": 1.4749124854142357e-07, - "loss": 2.5493, - "step": 32600 - }, - { - "epoch": 8.430675797906707, - "learning_rate": 1.4745883573188123e-07, - "loss": 2.5594, - "step": 32620 - }, - { - "epoch": 8.43584442434423, - "learning_rate": 1.474264229223389e-07, - "loss": 2.5149, - "step": 32640 - }, - { - "epoch": 8.441013050781756, - "learning_rate": 1.4739401011279656e-07, - "loss": 2.5238, - "step": 32660 - }, - { - "epoch": 8.446181677219279, - "learning_rate": 1.4736159730325425e-07, - "loss": 2.5444, - "step": 32680 - }, - { - "epoch": 8.451350303656803, - "learning_rate": 1.473291844937119e-07, - "loss": 2.5498, - "step": 32700 - }, - { - "epoch": 8.456518930094328, - "learning_rate": 1.4729677168416957e-07, - "loss": 2.5182, - "step": 32720 - }, - { - "epoch": 8.461687556531851, - "learning_rate": 1.4726435887462726e-07, - "loss": 2.5588, - "step": 32740 - }, - { - "epoch": 8.466856182969376, - "learning_rate": 1.4723194606508492e-07, - "loss": 2.5302, - "step": 32760 - }, - { - "epoch": 8.4720248094069, - "learning_rate": 1.4719953325554258e-07, - "loss": 2.5296, - "step": 32780 - }, - { - "epoch": 8.477193435844425, - "learning_rate": 1.4716712044600024e-07, - "loss": 2.5676, - "step": 32800 - }, - { - "epoch": 8.482362062281949, - "learning_rate": 1.471347076364579e-07, - "loss": 2.567, - "step": 32820 - }, - { - "epoch": 8.487530688719472, - "learning_rate": 1.471022948269156e-07, - "loss": 2.5531, - "step": 32840 - }, - { - "epoch": 8.492699315156997, - "learning_rate": 1.4706988201737326e-07, - "loss": 2.6131, - "step": 32860 - }, - { - "epoch": 8.49786794159452, - "learning_rate": 1.4703746920783092e-07, - "loss": 2.5818, - "step": 32880 - }, - { - "epoch": 8.503036568032046, - "learning_rate": 1.470050563982886e-07, - "loss": 2.542, - "step": 32900 - }, - { - "epoch": 8.50820519446957, - "learning_rate": 1.4697264358874627e-07, - "loss": 2.5656, - "step": 32920 - }, - { - "epoch": 8.513373820907095, - "learning_rate": 1.4694023077920393e-07, - "loss": 2.5548, - "step": 32940 - }, - { - "epoch": 8.518542447344618, - "learning_rate": 1.4690781796966162e-07, - "loss": 2.5295, - "step": 32960 - }, - { - "epoch": 8.523711073782142, - "learning_rate": 1.4687540516011925e-07, - "loss": 2.4961, - "step": 32980 - }, - { - "epoch": 8.528879700219667, - "learning_rate": 1.4684299235057694e-07, - "loss": 2.5361, - "step": 33000 - }, - { - "epoch": 8.53404832665719, - "learning_rate": 1.468105795410346e-07, - "loss": 2.5244, - "step": 33020 - }, - { - "epoch": 8.539216953094716, - "learning_rate": 1.4677816673149227e-07, - "loss": 2.4984, - "step": 33040 - }, - { - "epoch": 8.54438557953224, - "learning_rate": 1.4674575392194995e-07, - "loss": 2.5107, - "step": 33060 - }, - { - "epoch": 8.549554205969763, - "learning_rate": 1.4671334111240762e-07, - "loss": 2.6018, - "step": 33080 - }, - { - "epoch": 8.554722832407288, - "learning_rate": 1.4668092830286528e-07, - "loss": 2.5481, - "step": 33100 - }, - { - "epoch": 8.559891458844811, - "learning_rate": 1.4664851549332297e-07, - "loss": 2.5671, - "step": 33120 - }, - { - "epoch": 8.565060085282337, - "learning_rate": 1.466161026837806e-07, - "loss": 2.5587, - "step": 33140 - }, - { - "epoch": 8.57022871171986, - "learning_rate": 1.465836898742383e-07, - "loss": 2.4905, - "step": 33160 - }, - { - "epoch": 8.575397338157385, - "learning_rate": 1.4655127706469598e-07, - "loss": 2.5159, - "step": 33180 - }, - { - "epoch": 8.580565964594909, - "learning_rate": 1.4651886425515361e-07, - "loss": 2.5419, - "step": 33200 - }, - { - "epoch": 8.585734591032432, - "learning_rate": 1.464864514456113e-07, - "loss": 2.5811, - "step": 33220 - }, - { - "epoch": 8.590903217469958, - "learning_rate": 1.4645403863606896e-07, - "loss": 2.5175, - "step": 33240 - }, - { - "epoch": 8.596071843907481, - "learning_rate": 1.4642162582652663e-07, - "loss": 2.553, - "step": 33260 - }, - { - "epoch": 8.601240470345006, - "learning_rate": 1.4638921301698431e-07, - "loss": 2.5869, - "step": 33280 - }, - { - "epoch": 8.60640909678253, - "learning_rate": 1.4635680020744198e-07, - "loss": 2.5046, - "step": 33300 - }, - { - "epoch": 8.611577723220055, - "learning_rate": 1.4632438739789964e-07, - "loss": 2.5389, - "step": 33320 - }, - { - "epoch": 8.616746349657578, - "learning_rate": 1.4629197458835733e-07, - "loss": 2.5497, - "step": 33340 - }, - { - "epoch": 8.621914976095102, - "learning_rate": 1.4625956177881496e-07, - "loss": 2.5344, - "step": 33360 - }, - { - "epoch": 8.627083602532627, - "learning_rate": 1.4622714896927265e-07, - "loss": 2.6, - "step": 33380 - }, - { - "epoch": 8.63225222897015, - "learning_rate": 1.461947361597303e-07, - "loss": 2.5658, - "step": 33400 - }, - { - "epoch": 8.637420855407676, - "learning_rate": 1.4616232335018797e-07, - "loss": 2.539, - "step": 33420 - }, - { - "epoch": 8.6425894818452, - "learning_rate": 1.4612991054064566e-07, - "loss": 2.5898, - "step": 33440 - }, - { - "epoch": 8.647758108282725, - "learning_rate": 1.4609749773110332e-07, - "loss": 2.4808, - "step": 33460 - }, - { - "epoch": 8.652926734720248, - "learning_rate": 1.4606508492156099e-07, - "loss": 2.4885, - "step": 33480 - }, - { - "epoch": 8.658095361157772, - "learning_rate": 1.4603267211201867e-07, - "loss": 2.5437, - "step": 33500 - }, - { - "epoch": 8.663263987595297, - "learning_rate": 1.4600025930247634e-07, - "loss": 2.5715, - "step": 33520 - }, - { - "epoch": 8.66843261403282, - "learning_rate": 1.45967846492934e-07, - "loss": 2.5828, - "step": 33540 - }, - { - "epoch": 8.673601240470346, - "learning_rate": 1.4593543368339169e-07, - "loss": 2.4961, - "step": 33560 - }, - { - "epoch": 8.678769866907869, - "learning_rate": 1.4590302087384932e-07, - "loss": 2.5212, - "step": 33580 - }, - { - "epoch": 8.683938493345394, - "learning_rate": 1.45870608064307e-07, - "loss": 2.5321, - "step": 33600 - }, - { - "epoch": 8.689107119782918, - "learning_rate": 1.4583819525476467e-07, - "loss": 2.5146, - "step": 33620 - }, - { - "epoch": 8.694275746220441, - "learning_rate": 1.4580578244522233e-07, - "loss": 2.5531, - "step": 33640 - }, - { - "epoch": 8.699444372657966, - "learning_rate": 1.4577336963568002e-07, - "loss": 2.4989, - "step": 33660 - }, - { - "epoch": 8.70461299909549, - "learning_rate": 1.4574095682613768e-07, - "loss": 2.5502, - "step": 33680 - }, - { - "epoch": 8.709781625533015, - "learning_rate": 1.4570854401659535e-07, - "loss": 2.4926, - "step": 33700 - }, - { - "epoch": 8.714950251970539, - "learning_rate": 1.4567613120705303e-07, - "loss": 2.5462, - "step": 33720 - }, - { - "epoch": 8.720118878408064, - "learning_rate": 1.4564371839751067e-07, - "loss": 2.5373, - "step": 33740 - }, - { - "epoch": 8.725287504845587, - "learning_rate": 1.4561130558796836e-07, - "loss": 2.5745, - "step": 33760 - }, - { - "epoch": 8.73045613128311, - "learning_rate": 1.4557889277842605e-07, - "loss": 2.5468, - "step": 33780 - }, - { - "epoch": 8.735624757720636, - "learning_rate": 1.4554647996888368e-07, - "loss": 2.5654, - "step": 33800 - }, - { - "epoch": 8.74079338415816, - "learning_rate": 1.4551406715934137e-07, - "loss": 2.5433, - "step": 33820 - }, - { - "epoch": 8.745962010595685, - "learning_rate": 1.4548165434979903e-07, - "loss": 2.5865, - "step": 33840 - }, - { - "epoch": 8.751130637033208, - "learning_rate": 1.454492415402567e-07, - "loss": 2.5631, - "step": 33860 - }, - { - "epoch": 8.756299263470734, - "learning_rate": 1.4541682873071438e-07, - "loss": 2.5857, - "step": 33880 - }, - { - "epoch": 8.761467889908257, - "learning_rate": 1.4538441592117204e-07, - "loss": 2.5168, - "step": 33900 - }, - { - "epoch": 8.76663651634578, - "learning_rate": 1.453520031116297e-07, - "loss": 2.5316, - "step": 33920 - }, - { - "epoch": 8.771805142783306, - "learning_rate": 1.453195903020874e-07, - "loss": 2.5464, - "step": 33940 - }, - { - "epoch": 8.77697376922083, - "learning_rate": 1.4528717749254503e-07, - "loss": 2.5508, - "step": 33960 - }, - { - "epoch": 8.782142395658354, - "learning_rate": 1.4525476468300272e-07, - "loss": 2.5082, - "step": 33980 - }, - { - "epoch": 8.787311022095878, - "learning_rate": 1.4522235187346038e-07, - "loss": 2.502, - "step": 34000 - }, - { - "epoch": 8.792479648533401, - "learning_rate": 1.4518993906391804e-07, - "loss": 2.5586, - "step": 34020 - }, - { - "epoch": 8.797648274970927, - "learning_rate": 1.4515752625437573e-07, - "loss": 2.5555, - "step": 34040 - }, - { - "epoch": 8.80281690140845, - "learning_rate": 1.451251134448334e-07, - "loss": 2.5683, - "step": 34060 - }, - { - "epoch": 8.807985527845975, - "learning_rate": 1.4509270063529105e-07, - "loss": 2.5462, - "step": 34080 - }, - { - "epoch": 8.813154154283499, - "learning_rate": 1.4506028782574874e-07, - "loss": 2.4885, - "step": 34100 - }, - { - "epoch": 8.818322780721024, - "learning_rate": 1.450278750162064e-07, - "loss": 2.4856, - "step": 34120 - }, - { - "epoch": 8.823491407158548, - "learning_rate": 1.4499546220666407e-07, - "loss": 2.538, - "step": 34140 - }, - { - "epoch": 8.828660033596073, - "learning_rate": 1.4496304939712175e-07, - "loss": 2.5263, - "step": 34160 - }, - { - "epoch": 8.833828660033596, - "learning_rate": 1.449306365875794e-07, - "loss": 2.5397, - "step": 34180 - }, - { - "epoch": 8.83899728647112, - "learning_rate": 1.4489822377803708e-07, - "loss": 2.5295, - "step": 34200 - }, - { - "epoch": 8.844165912908645, - "learning_rate": 1.4486581096849474e-07, - "loss": 2.5853, - "step": 34220 - }, - { - "epoch": 8.849334539346168, - "learning_rate": 1.448333981589524e-07, - "loss": 2.5566, - "step": 34240 - }, - { - "epoch": 8.854503165783694, - "learning_rate": 1.448009853494101e-07, - "loss": 2.5319, - "step": 34260 - }, - { - "epoch": 8.859671792221217, - "learning_rate": 1.4476857253986775e-07, - "loss": 2.4863, - "step": 34280 - }, - { - "epoch": 8.86484041865874, - "learning_rate": 1.4473615973032541e-07, - "loss": 2.5584, - "step": 34300 - }, - { - "epoch": 8.870009045096266, - "learning_rate": 1.447037469207831e-07, - "loss": 2.5321, - "step": 34320 - }, - { - "epoch": 8.87517767153379, - "learning_rate": 1.4467133411124074e-07, - "loss": 2.5304, - "step": 34340 - }, - { - "epoch": 8.880346297971315, - "learning_rate": 1.4463892130169843e-07, - "loss": 2.5564, - "step": 34360 - }, - { - "epoch": 8.885514924408838, - "learning_rate": 1.4460650849215611e-07, - "loss": 2.5027, - "step": 34380 - }, - { - "epoch": 8.890683550846363, - "learning_rate": 1.4457409568261375e-07, - "loss": 2.4922, - "step": 34400 - }, - { - "epoch": 8.895852177283887, - "learning_rate": 1.4454168287307144e-07, - "loss": 2.5166, - "step": 34420 - }, - { - "epoch": 8.90102080372141, - "learning_rate": 1.445092700635291e-07, - "loss": 2.527, - "step": 34440 - }, - { - "epoch": 8.906189430158935, - "learning_rate": 1.4447685725398676e-07, - "loss": 2.5665, - "step": 34460 - }, - { - "epoch": 8.911358056596459, - "learning_rate": 1.4444444444444445e-07, - "loss": 2.5615, - "step": 34480 - }, - { - "epoch": 8.916526683033984, - "learning_rate": 1.444120316349021e-07, - "loss": 2.4899, - "step": 34500 - }, - { - "epoch": 8.921695309471508, - "learning_rate": 1.4437961882535977e-07, - "loss": 2.5012, - "step": 34520 - }, - { - "epoch": 8.926863935909033, - "learning_rate": 1.4434720601581746e-07, - "loss": 2.5526, - "step": 34540 - }, - { - "epoch": 8.932032562346556, - "learning_rate": 1.443147932062751e-07, - "loss": 2.5167, - "step": 34560 - }, - { - "epoch": 8.93720118878408, - "learning_rate": 1.4428238039673279e-07, - "loss": 2.5177, - "step": 34580 - }, - { - "epoch": 8.942369815221605, - "learning_rate": 1.4424996758719045e-07, - "loss": 2.5548, - "step": 34600 - }, - { - "epoch": 8.947538441659129, - "learning_rate": 1.442175547776481e-07, - "loss": 2.5742, - "step": 34620 - }, - { - "epoch": 8.952707068096654, - "learning_rate": 1.441851419681058e-07, - "loss": 2.5149, - "step": 34640 - }, - { - "epoch": 8.957875694534177, - "learning_rate": 1.4415272915856346e-07, - "loss": 2.5042, - "step": 34660 - }, - { - "epoch": 8.963044320971703, - "learning_rate": 1.4412031634902112e-07, - "loss": 2.5076, - "step": 34680 - }, - { - "epoch": 8.968212947409226, - "learning_rate": 1.440879035394788e-07, - "loss": 2.5282, - "step": 34700 - }, - { - "epoch": 8.97338157384675, - "learning_rate": 1.4405549072993647e-07, - "loss": 2.5373, - "step": 34720 - }, - { - "epoch": 8.978550200284275, - "learning_rate": 1.4402307792039413e-07, - "loss": 2.5556, - "step": 34740 - }, - { - "epoch": 8.983718826721798, - "learning_rate": 1.4399066511085182e-07, - "loss": 2.5413, - "step": 34760 - }, - { - "epoch": 8.988887453159323, - "learning_rate": 1.4395825230130946e-07, - "loss": 2.5563, - "step": 34780 - }, - { - "epoch": 8.994056079596847, - "learning_rate": 1.4392583949176715e-07, - "loss": 2.5267, - "step": 34800 - }, - { - "epoch": 8.999224706034372, - "learning_rate": 1.438934266822248e-07, - "loss": 2.5622, - "step": 34820 - }, - { - "epoch": 9.0, - "eval_bleu": 10.2353, - "eval_gen_len": 38.2607, - "eval_loss": 2.504971742630005, - "eval_runtime": 830.1656, - "eval_samples_per_second": 2.074, - "eval_steps_per_second": 1.037, - "step": 34823 } ], "logging_steps": 20, @@ -10568,7 +5884,7 @@ "attributes": {} } }, - "total_flos": 2.2859570485592064e+17, + "total_flos": 1.2701974646813491e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null