{ "best_metric": 35.086130108549604, "best_model_checkpoint": "./whisper-medium-be/checkpoint-3000", "epoch": 4.248088360237893, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021240441801189464, "grad_norm": 15.320006370544434, "learning_rate": 4.4e-07, "loss": 1.2164, "step": 25 }, { "epoch": 0.04248088360237893, "grad_norm": 8.513395309448242, "learning_rate": 9.200000000000001e-07, "loss": 0.9688, "step": 50 }, { "epoch": 0.0637213254035684, "grad_norm": 8.396470069885254, "learning_rate": 1.42e-06, "loss": 0.6889, "step": 75 }, { "epoch": 0.08496176720475786, "grad_norm": 5.034422874450684, "learning_rate": 1.9200000000000003e-06, "loss": 0.5062, "step": 100 }, { "epoch": 0.10620220900594732, "grad_norm": 4.539931774139404, "learning_rate": 2.42e-06, "loss": 0.3953, "step": 125 }, { "epoch": 0.1274426508071368, "grad_norm": 5.459708213806152, "learning_rate": 2.92e-06, "loss": 0.3239, "step": 150 }, { "epoch": 0.14868309260832624, "grad_norm": 4.348228931427002, "learning_rate": 3.4200000000000007e-06, "loss": 0.2959, "step": 175 }, { "epoch": 0.16992353440951571, "grad_norm": 4.323150634765625, "learning_rate": 3.920000000000001e-06, "loss": 0.2555, "step": 200 }, { "epoch": 0.1911639762107052, "grad_norm": 3.7983508110046387, "learning_rate": 4.42e-06, "loss": 0.2441, "step": 225 }, { "epoch": 0.21240441801189464, "grad_norm": 5.1654744148254395, "learning_rate": 4.92e-06, "loss": 0.2362, "step": 250 }, { "epoch": 0.2336448598130841, "grad_norm": 4.663578510284424, "learning_rate": 5.420000000000001e-06, "loss": 0.2136, "step": 275 }, { "epoch": 0.2548853016142736, "grad_norm": 4.335070610046387, "learning_rate": 5.92e-06, "loss": 0.2166, "step": 300 }, { "epoch": 0.27612574341546303, "grad_norm": 4.008061408996582, "learning_rate": 6.42e-06, "loss": 0.198, "step": 325 }, { "epoch": 0.2973661852166525, "grad_norm": 3.7923827171325684, "learning_rate": 6.92e-06, "loss": 0.2029, "step": 350 }, { "epoch": 0.318606627017842, "grad_norm": 2.872164487838745, "learning_rate": 7.420000000000001e-06, "loss": 0.2017, "step": 375 }, { "epoch": 0.33984706881903143, "grad_norm": 4.224575996398926, "learning_rate": 7.92e-06, "loss": 0.1787, "step": 400 }, { "epoch": 0.3610875106202209, "grad_norm": 2.8672895431518555, "learning_rate": 8.42e-06, "loss": 0.1834, "step": 425 }, { "epoch": 0.3823279524214104, "grad_norm": 4.826914310455322, "learning_rate": 8.920000000000001e-06, "loss": 0.1806, "step": 450 }, { "epoch": 0.4035683942225998, "grad_norm": 3.9499950408935547, "learning_rate": 9.42e-06, "loss": 0.1944, "step": 475 }, { "epoch": 0.42480883602378927, "grad_norm": 2.290720224380493, "learning_rate": 9.920000000000002e-06, "loss": 0.1816, "step": 500 }, { "epoch": 0.4460492778249788, "grad_norm": 3.0552995204925537, "learning_rate": 9.98551724137931e-06, "loss": 0.1812, "step": 525 }, { "epoch": 0.4672897196261682, "grad_norm": 2.8061227798461914, "learning_rate": 9.968275862068967e-06, "loss": 0.1702, "step": 550 }, { "epoch": 0.48853016142735767, "grad_norm": 2.786482334136963, "learning_rate": 9.951034482758622e-06, "loss": 0.1557, "step": 575 }, { "epoch": 0.5097706032285472, "grad_norm": 2.803558588027954, "learning_rate": 9.933793103448277e-06, "loss": 0.154, "step": 600 }, { "epoch": 0.5310110450297366, "grad_norm": 2.788571834564209, "learning_rate": 9.916551724137932e-06, "loss": 0.1568, "step": 625 }, { "epoch": 0.5522514868309261, "grad_norm": 2.8980190753936768, "learning_rate": 9.899310344827588e-06, "loss": 0.1564, "step": 650 }, { "epoch": 0.5734919286321155, "grad_norm": 2.7520530223846436, "learning_rate": 9.882068965517241e-06, "loss": 0.1515, "step": 675 }, { "epoch": 0.594732370433305, "grad_norm": 3.0198278427124023, "learning_rate": 9.864827586206898e-06, "loss": 0.1415, "step": 700 }, { "epoch": 0.6159728122344945, "grad_norm": 3.080418348312378, "learning_rate": 9.847586206896553e-06, "loss": 0.1539, "step": 725 }, { "epoch": 0.637213254035684, "grad_norm": 3.3412156105041504, "learning_rate": 9.830344827586208e-06, "loss": 0.1411, "step": 750 }, { "epoch": 0.6584536958368734, "grad_norm": 2.7141354084014893, "learning_rate": 9.813103448275862e-06, "loss": 0.1494, "step": 775 }, { "epoch": 0.6796941376380629, "grad_norm": 3.718287944793701, "learning_rate": 9.795862068965517e-06, "loss": 0.1395, "step": 800 }, { "epoch": 0.7009345794392523, "grad_norm": 2.5397861003875732, "learning_rate": 9.778620689655172e-06, "loss": 0.1439, "step": 825 }, { "epoch": 0.7221750212404418, "grad_norm": 2.6340572834014893, "learning_rate": 9.761379310344829e-06, "loss": 0.1365, "step": 850 }, { "epoch": 0.7434154630416313, "grad_norm": 3.546412706375122, "learning_rate": 9.744137931034484e-06, "loss": 0.1358, "step": 875 }, { "epoch": 0.7646559048428208, "grad_norm": 2.8890087604522705, "learning_rate": 9.726896551724139e-06, "loss": 0.1408, "step": 900 }, { "epoch": 0.7858963466440102, "grad_norm": 2.474982500076294, "learning_rate": 9.709655172413795e-06, "loss": 0.1387, "step": 925 }, { "epoch": 0.8071367884451996, "grad_norm": 2.0758702754974365, "learning_rate": 9.692413793103448e-06, "loss": 0.1354, "step": 950 }, { "epoch": 0.8283772302463891, "grad_norm": 2.4454121589660645, "learning_rate": 9.675172413793105e-06, "loss": 0.1319, "step": 975 }, { "epoch": 0.8496176720475785, "grad_norm": 4.150802135467529, "learning_rate": 9.65793103448276e-06, "loss": 0.14, "step": 1000 }, { "epoch": 0.8496176720475785, "eval_loss": 0.13197487592697144, "eval_runtime": 5979.5965, "eval_samples_per_second": 0.789, "eval_steps_per_second": 0.099, "eval_wer": 39.63041804429548, "step": 1000 }, { "epoch": 0.8708581138487681, "grad_norm": 3.1269190311431885, "learning_rate": 9.640689655172415e-06, "loss": 0.1284, "step": 1025 }, { "epoch": 0.8920985556499575, "grad_norm": 2.9756011962890625, "learning_rate": 9.62344827586207e-06, "loss": 0.1238, "step": 1050 }, { "epoch": 0.913338997451147, "grad_norm": 2.2128994464874268, "learning_rate": 9.606206896551726e-06, "loss": 0.129, "step": 1075 }, { "epoch": 0.9345794392523364, "grad_norm": 3.9295644760131836, "learning_rate": 9.58896551724138e-06, "loss": 0.1366, "step": 1100 }, { "epoch": 0.9558198810535259, "grad_norm": 2.3344342708587646, "learning_rate": 9.571724137931036e-06, "loss": 0.1373, "step": 1125 }, { "epoch": 0.9770603228547153, "grad_norm": 3.186511516571045, "learning_rate": 9.55448275862069e-06, "loss": 0.1396, "step": 1150 }, { "epoch": 0.9983007646559049, "grad_norm": 1.856980562210083, "learning_rate": 9.537241379310345e-06, "loss": 0.1226, "step": 1175 }, { "epoch": 1.0195412064570943, "grad_norm": 2.5547516345977783, "learning_rate": 9.52e-06, "loss": 0.0963, "step": 1200 }, { "epoch": 1.0407816482582837, "grad_norm": 2.0279276371002197, "learning_rate": 9.502758620689655e-06, "loss": 0.0973, "step": 1225 }, { "epoch": 1.0620220900594732, "grad_norm": 2.0436954498291016, "learning_rate": 9.48551724137931e-06, "loss": 0.0991, "step": 1250 }, { "epoch": 1.0832625318606628, "grad_norm": 2.1336090564727783, "learning_rate": 9.468275862068967e-06, "loss": 0.1029, "step": 1275 }, { "epoch": 1.1045029736618521, "grad_norm": 3.0516738891601562, "learning_rate": 9.451034482758622e-06, "loss": 0.1043, "step": 1300 }, { "epoch": 1.1257434154630417, "grad_norm": 2.6009092330932617, "learning_rate": 9.433793103448276e-06, "loss": 0.0995, "step": 1325 }, { "epoch": 1.146983857264231, "grad_norm": 2.47499680519104, "learning_rate": 9.416551724137933e-06, "loss": 0.1036, "step": 1350 }, { "epoch": 1.1682242990654206, "grad_norm": 1.6448906660079956, "learning_rate": 9.399310344827586e-06, "loss": 0.1011, "step": 1375 }, { "epoch": 1.18946474086661, "grad_norm": 1.9409310817718506, "learning_rate": 9.382068965517243e-06, "loss": 0.1087, "step": 1400 }, { "epoch": 1.2107051826677995, "grad_norm": 2.1227598190307617, "learning_rate": 9.364827586206898e-06, "loss": 0.1038, "step": 1425 }, { "epoch": 1.231945624468989, "grad_norm": 2.4984381198883057, "learning_rate": 9.347586206896552e-06, "loss": 0.093, "step": 1450 }, { "epoch": 1.2531860662701784, "grad_norm": 1.5227315425872803, "learning_rate": 9.330344827586207e-06, "loss": 0.1014, "step": 1475 }, { "epoch": 1.274426508071368, "grad_norm": 2.6108906269073486, "learning_rate": 9.313103448275864e-06, "loss": 0.095, "step": 1500 }, { "epoch": 1.2956669498725573, "grad_norm": 3.612428903579712, "learning_rate": 9.295862068965517e-06, "loss": 0.0966, "step": 1525 }, { "epoch": 1.3169073916737468, "grad_norm": 2.017115831375122, "learning_rate": 9.278620689655174e-06, "loss": 0.1033, "step": 1550 }, { "epoch": 1.3381478334749364, "grad_norm": 2.264723539352417, "learning_rate": 9.261379310344828e-06, "loss": 0.1043, "step": 1575 }, { "epoch": 1.3593882752761257, "grad_norm": 2.3359768390655518, "learning_rate": 9.244137931034483e-06, "loss": 0.106, "step": 1600 }, { "epoch": 1.3806287170773153, "grad_norm": 3.682831287384033, "learning_rate": 9.226896551724138e-06, "loss": 0.1072, "step": 1625 }, { "epoch": 1.4018691588785046, "grad_norm": 1.7336595058441162, "learning_rate": 9.209655172413793e-06, "loss": 0.0998, "step": 1650 }, { "epoch": 1.4231096006796942, "grad_norm": 3.3865325450897217, "learning_rate": 9.192413793103448e-06, "loss": 0.0977, "step": 1675 }, { "epoch": 1.4443500424808837, "grad_norm": 1.7132407426834106, "learning_rate": 9.175172413793105e-06, "loss": 0.0966, "step": 1700 }, { "epoch": 1.465590484282073, "grad_norm": 2.182499647140503, "learning_rate": 9.15793103448276e-06, "loss": 0.0993, "step": 1725 }, { "epoch": 1.4868309260832624, "grad_norm": 2.137913942337036, "learning_rate": 9.140689655172414e-06, "loss": 0.0959, "step": 1750 }, { "epoch": 1.508071367884452, "grad_norm": 1.5623725652694702, "learning_rate": 9.12344827586207e-06, "loss": 0.0975, "step": 1775 }, { "epoch": 1.5293118096856415, "grad_norm": 1.7139003276824951, "learning_rate": 9.106206896551724e-06, "loss": 0.0903, "step": 1800 }, { "epoch": 1.550552251486831, "grad_norm": 1.6749699115753174, "learning_rate": 9.08896551724138e-06, "loss": 0.1032, "step": 1825 }, { "epoch": 1.5717926932880204, "grad_norm": 1.4775348901748657, "learning_rate": 9.071724137931035e-06, "loss": 0.0938, "step": 1850 }, { "epoch": 1.5930331350892097, "grad_norm": 1.4703927040100098, "learning_rate": 9.05448275862069e-06, "loss": 0.0893, "step": 1875 }, { "epoch": 1.6142735768903993, "grad_norm": 1.614033579826355, "learning_rate": 9.037241379310345e-06, "loss": 0.0986, "step": 1900 }, { "epoch": 1.6355140186915889, "grad_norm": 1.822026014328003, "learning_rate": 9.020000000000002e-06, "loss": 0.0993, "step": 1925 }, { "epoch": 1.6567544604927784, "grad_norm": 3.0836634635925293, "learning_rate": 9.002758620689655e-06, "loss": 0.1044, "step": 1950 }, { "epoch": 1.6779949022939677, "grad_norm": 1.8662205934524536, "learning_rate": 8.985517241379311e-06, "loss": 0.105, "step": 1975 }, { "epoch": 1.699235344095157, "grad_norm": 1.576250433921814, "learning_rate": 8.968275862068966e-06, "loss": 0.0945, "step": 2000 }, { "epoch": 1.699235344095157, "eval_loss": 0.11690503358840942, "eval_runtime": 5994.3147, "eval_samples_per_second": 0.787, "eval_steps_per_second": 0.098, "eval_wer": 36.85704364630041, "step": 2000 }, { "epoch": 1.7204757858963466, "grad_norm": 2.065964937210083, "learning_rate": 8.951034482758621e-06, "loss": 0.0974, "step": 2025 }, { "epoch": 1.7417162276975362, "grad_norm": 1.7408511638641357, "learning_rate": 8.933793103448276e-06, "loss": 0.1026, "step": 2050 }, { "epoch": 1.7629566694987255, "grad_norm": 1.8468472957611084, "learning_rate": 8.916551724137931e-06, "loss": 0.0868, "step": 2075 }, { "epoch": 1.7841971112999149, "grad_norm": 1.8215452432632446, "learning_rate": 8.899310344827588e-06, "loss": 0.0957, "step": 2100 }, { "epoch": 1.8054375531011044, "grad_norm": 1.5454055070877075, "learning_rate": 8.882068965517242e-06, "loss": 0.0948, "step": 2125 }, { "epoch": 1.826677994902294, "grad_norm": 1.947662115097046, "learning_rate": 8.864827586206897e-06, "loss": 0.0971, "step": 2150 }, { "epoch": 1.8479184367034835, "grad_norm": 1.5574594736099243, "learning_rate": 8.847586206896552e-06, "loss": 0.0917, "step": 2175 }, { "epoch": 1.8691588785046729, "grad_norm": 1.2428698539733887, "learning_rate": 8.830344827586209e-06, "loss": 0.1034, "step": 2200 }, { "epoch": 1.8903993203058622, "grad_norm": 1.9029223918914795, "learning_rate": 8.813103448275862e-06, "loss": 0.0978, "step": 2225 }, { "epoch": 1.9116397621070518, "grad_norm": 2.2894680500030518, "learning_rate": 8.795862068965518e-06, "loss": 0.0975, "step": 2250 }, { "epoch": 1.9328802039082413, "grad_norm": 1.284964680671692, "learning_rate": 8.778620689655173e-06, "loss": 0.096, "step": 2275 }, { "epoch": 1.954120645709431, "grad_norm": 1.7411472797393799, "learning_rate": 8.761379310344828e-06, "loss": 0.0961, "step": 2300 }, { "epoch": 1.9753610875106202, "grad_norm": 2.421389102935791, "learning_rate": 8.744137931034483e-06, "loss": 0.0954, "step": 2325 }, { "epoch": 1.9966015293118096, "grad_norm": 2.0307414531707764, "learning_rate": 8.72689655172414e-06, "loss": 0.1041, "step": 2350 }, { "epoch": 2.017841971112999, "grad_norm": 2.228976249694824, "learning_rate": 8.709655172413793e-06, "loss": 0.0677, "step": 2375 }, { "epoch": 2.0390824129141887, "grad_norm": 2.581620693206787, "learning_rate": 8.69241379310345e-06, "loss": 0.07, "step": 2400 }, { "epoch": 2.0603228547153782, "grad_norm": 1.4978506565093994, "learning_rate": 8.675172413793104e-06, "loss": 0.0694, "step": 2425 }, { "epoch": 2.0815632965165674, "grad_norm": 1.972231388092041, "learning_rate": 8.657931034482759e-06, "loss": 0.0667, "step": 2450 }, { "epoch": 2.102803738317757, "grad_norm": 2.09955096244812, "learning_rate": 8.640689655172414e-06, "loss": 0.0663, "step": 2475 }, { "epoch": 2.1240441801189465, "grad_norm": 2.7959463596343994, "learning_rate": 8.623448275862069e-06, "loss": 0.0685, "step": 2500 }, { "epoch": 2.145284621920136, "grad_norm": 1.7082490921020508, "learning_rate": 8.606206896551725e-06, "loss": 0.0631, "step": 2525 }, { "epoch": 2.1665250637213256, "grad_norm": 1.101549506187439, "learning_rate": 8.58896551724138e-06, "loss": 0.0745, "step": 2550 }, { "epoch": 2.1877655055225147, "grad_norm": 1.121897578239441, "learning_rate": 8.571724137931035e-06, "loss": 0.0637, "step": 2575 }, { "epoch": 2.2090059473237043, "grad_norm": 1.6201415061950684, "learning_rate": 8.55448275862069e-06, "loss": 0.0668, "step": 2600 }, { "epoch": 2.230246389124894, "grad_norm": 1.5426428318023682, "learning_rate": 8.537241379310347e-06, "loss": 0.0729, "step": 2625 }, { "epoch": 2.2514868309260834, "grad_norm": 1.564487099647522, "learning_rate": 8.52e-06, "loss": 0.0687, "step": 2650 }, { "epoch": 2.2727272727272725, "grad_norm": 1.3694915771484375, "learning_rate": 8.502758620689656e-06, "loss": 0.0638, "step": 2675 }, { "epoch": 2.293967714528462, "grad_norm": 1.5504151582717896, "learning_rate": 8.485517241379311e-06, "loss": 0.0655, "step": 2700 }, { "epoch": 2.3152081563296516, "grad_norm": 1.066007375717163, "learning_rate": 8.468275862068966e-06, "loss": 0.0727, "step": 2725 }, { "epoch": 2.336448598130841, "grad_norm": 1.760932207107544, "learning_rate": 8.451034482758621e-06, "loss": 0.0665, "step": 2750 }, { "epoch": 2.3576890399320307, "grad_norm": 1.405328392982483, "learning_rate": 8.433793103448277e-06, "loss": 0.0672, "step": 2775 }, { "epoch": 2.37892948173322, "grad_norm": 2.686798572540283, "learning_rate": 8.41655172413793e-06, "loss": 0.0686, "step": 2800 }, { "epoch": 2.4001699235344094, "grad_norm": 1.3735514879226685, "learning_rate": 8.399310344827587e-06, "loss": 0.0659, "step": 2825 }, { "epoch": 2.421410365335599, "grad_norm": 1.1501052379608154, "learning_rate": 8.382068965517242e-06, "loss": 0.0681, "step": 2850 }, { "epoch": 2.4426508071367885, "grad_norm": 1.6193690299987793, "learning_rate": 8.364827586206897e-06, "loss": 0.0698, "step": 2875 }, { "epoch": 2.463891248937978, "grad_norm": 1.5359407663345337, "learning_rate": 8.347586206896552e-06, "loss": 0.0662, "step": 2900 }, { "epoch": 2.485131690739167, "grad_norm": 1.283834457397461, "learning_rate": 8.330344827586208e-06, "loss": 0.0659, "step": 2925 }, { "epoch": 2.5063721325403567, "grad_norm": 1.861663818359375, "learning_rate": 8.313103448275863e-06, "loss": 0.0692, "step": 2950 }, { "epoch": 2.5276125743415463, "grad_norm": 2.0781443119049072, "learning_rate": 8.295862068965518e-06, "loss": 0.0647, "step": 2975 }, { "epoch": 2.548853016142736, "grad_norm": 1.4371556043624878, "learning_rate": 8.278620689655173e-06, "loss": 0.0673, "step": 3000 }, { "epoch": 2.548853016142736, "eval_loss": 0.11657554656267166, "eval_runtime": 5993.138, "eval_samples_per_second": 0.787, "eval_steps_per_second": 0.098, "eval_wer": 35.086130108549604, "step": 3000 }, { "epoch": 2.5700934579439254, "grad_norm": 1.3172967433929443, "learning_rate": 8.261379310344828e-06, "loss": 0.0713, "step": 3025 }, { "epoch": 2.5913338997451145, "grad_norm": 1.6619157791137695, "learning_rate": 8.244137931034484e-06, "loss": 0.0642, "step": 3050 }, { "epoch": 2.612574341546304, "grad_norm": 2.0745792388916016, "learning_rate": 8.226896551724138e-06, "loss": 0.0666, "step": 3075 }, { "epoch": 2.6338147833474936, "grad_norm": 1.6569820642471313, "learning_rate": 8.209655172413794e-06, "loss": 0.0623, "step": 3100 }, { "epoch": 2.655055225148683, "grad_norm": 2.5140037536621094, "learning_rate": 8.192413793103449e-06, "loss": 0.0726, "step": 3125 }, { "epoch": 2.6762956669498728, "grad_norm": 1.3549573421478271, "learning_rate": 8.175172413793104e-06, "loss": 0.0678, "step": 3150 }, { "epoch": 2.697536108751062, "grad_norm": 2.3230879306793213, "learning_rate": 8.157931034482759e-06, "loss": 0.0654, "step": 3175 }, { "epoch": 2.7187765505522514, "grad_norm": 1.4579910039901733, "learning_rate": 8.140689655172415e-06, "loss": 0.0648, "step": 3200 }, { "epoch": 2.740016992353441, "grad_norm": 2.045642852783203, "learning_rate": 8.123448275862069e-06, "loss": 0.0671, "step": 3225 }, { "epoch": 2.7612574341546305, "grad_norm": 1.847688913345337, "learning_rate": 8.106206896551725e-06, "loss": 0.0665, "step": 3250 }, { "epoch": 2.78249787595582, "grad_norm": 2.109140396118164, "learning_rate": 8.08896551724138e-06, "loss": 0.0765, "step": 3275 }, { "epoch": 2.803738317757009, "grad_norm": 1.8210623264312744, "learning_rate": 8.071724137931035e-06, "loss": 0.0684, "step": 3300 }, { "epoch": 2.8249787595581988, "grad_norm": 1.6090059280395508, "learning_rate": 8.054482758620691e-06, "loss": 0.0667, "step": 3325 }, { "epoch": 2.8462192013593883, "grad_norm": 1.2714420557022095, "learning_rate": 8.037241379310346e-06, "loss": 0.0668, "step": 3350 }, { "epoch": 2.867459643160578, "grad_norm": 2.028714656829834, "learning_rate": 8.020000000000001e-06, "loss": 0.0637, "step": 3375 }, { "epoch": 2.8887000849617674, "grad_norm": 1.2474462985992432, "learning_rate": 8.002758620689656e-06, "loss": 0.0654, "step": 3400 }, { "epoch": 2.9099405267629566, "grad_norm": 1.373960018157959, "learning_rate": 7.985517241379311e-06, "loss": 0.066, "step": 3425 }, { "epoch": 2.931180968564146, "grad_norm": 1.931864857673645, "learning_rate": 7.968275862068966e-06, "loss": 0.0685, "step": 3450 }, { "epoch": 2.9524214103653357, "grad_norm": 2.4559192657470703, "learning_rate": 7.951034482758622e-06, "loss": 0.072, "step": 3475 }, { "epoch": 2.973661852166525, "grad_norm": 2.4385876655578613, "learning_rate": 7.933793103448275e-06, "loss": 0.0695, "step": 3500 }, { "epoch": 2.994902293967715, "grad_norm": 1.8550530672073364, "learning_rate": 7.916551724137932e-06, "loss": 0.0714, "step": 3525 }, { "epoch": 3.016142735768904, "grad_norm": 1.8583050966262817, "learning_rate": 7.899310344827587e-06, "loss": 0.051, "step": 3550 }, { "epoch": 3.0373831775700935, "grad_norm": 1.2027584314346313, "learning_rate": 7.882068965517242e-06, "loss": 0.0412, "step": 3575 }, { "epoch": 3.058623619371283, "grad_norm": 1.6738337278366089, "learning_rate": 7.864827586206897e-06, "loss": 0.0476, "step": 3600 }, { "epoch": 3.0798640611724726, "grad_norm": 1.2990750074386597, "learning_rate": 7.847586206896553e-06, "loss": 0.0445, "step": 3625 }, { "epoch": 3.1011045029736617, "grad_norm": 1.6865178346633911, "learning_rate": 7.830344827586206e-06, "loss": 0.0435, "step": 3650 }, { "epoch": 3.1223449447748512, "grad_norm": 1.803493618965149, "learning_rate": 7.813103448275863e-06, "loss": 0.0437, "step": 3675 }, { "epoch": 3.143585386576041, "grad_norm": 1.9559437036514282, "learning_rate": 7.795862068965518e-06, "loss": 0.0433, "step": 3700 }, { "epoch": 3.1648258283772304, "grad_norm": 1.1054346561431885, "learning_rate": 7.778620689655173e-06, "loss": 0.0446, "step": 3725 }, { "epoch": 3.1860662701784195, "grad_norm": 1.3556439876556396, "learning_rate": 7.76137931034483e-06, "loss": 0.0461, "step": 3750 }, { "epoch": 3.207306711979609, "grad_norm": 1.6769301891326904, "learning_rate": 7.744137931034484e-06, "loss": 0.0443, "step": 3775 }, { "epoch": 3.2285471537807986, "grad_norm": 1.7046959400177002, "learning_rate": 7.726896551724139e-06, "loss": 0.0451, "step": 3800 }, { "epoch": 3.249787595581988, "grad_norm": 2.6806883811950684, "learning_rate": 7.709655172413794e-06, "loss": 0.0441, "step": 3825 }, { "epoch": 3.2710280373831777, "grad_norm": 1.0329188108444214, "learning_rate": 7.692413793103449e-06, "loss": 0.0458, "step": 3850 }, { "epoch": 3.292268479184367, "grad_norm": 1.2138526439666748, "learning_rate": 7.675172413793104e-06, "loss": 0.0448, "step": 3875 }, { "epoch": 3.3135089209855564, "grad_norm": 0.8664061427116394, "learning_rate": 7.65793103448276e-06, "loss": 0.0466, "step": 3900 }, { "epoch": 3.334749362786746, "grad_norm": 0.9848700761795044, "learning_rate": 7.640689655172413e-06, "loss": 0.0502, "step": 3925 }, { "epoch": 3.3559898045879355, "grad_norm": 1.179836392402649, "learning_rate": 7.62344827586207e-06, "loss": 0.0462, "step": 3950 }, { "epoch": 3.377230246389125, "grad_norm": 2.3013267517089844, "learning_rate": 7.606206896551725e-06, "loss": 0.0444, "step": 3975 }, { "epoch": 3.398470688190314, "grad_norm": 1.637501835823059, "learning_rate": 7.58896551724138e-06, "loss": 0.0475, "step": 4000 }, { "epoch": 3.398470688190314, "eval_loss": 0.12786675989627838, "eval_runtime": 5959.6356, "eval_samples_per_second": 0.791, "eval_steps_per_second": 0.099, "eval_wer": 35.16010688889896, "step": 4000 }, { "epoch": 3.4197111299915037, "grad_norm": 1.6689115762710571, "learning_rate": 7.5717241379310345e-06, "loss": 0.0424, "step": 4025 }, { "epoch": 3.4409515717926933, "grad_norm": 1.9614856243133545, "learning_rate": 7.55448275862069e-06, "loss": 0.0485, "step": 4050 }, { "epoch": 3.462192013593883, "grad_norm": 1.556401014328003, "learning_rate": 7.537241379310345e-06, "loss": 0.0459, "step": 4075 }, { "epoch": 3.4834324553950724, "grad_norm": 1.4535021781921387, "learning_rate": 7.520000000000001e-06, "loss": 0.0447, "step": 4100 }, { "epoch": 3.5046728971962615, "grad_norm": 0.9936099648475647, "learning_rate": 7.5027586206896566e-06, "loss": 0.0465, "step": 4125 }, { "epoch": 3.525913338997451, "grad_norm": 1.7872281074523926, "learning_rate": 7.485517241379311e-06, "loss": 0.0504, "step": 4150 }, { "epoch": 3.5471537807986406, "grad_norm": 1.3985124826431274, "learning_rate": 7.468275862068966e-06, "loss": 0.0452, "step": 4175 }, { "epoch": 3.56839422259983, "grad_norm": 1.2840551137924194, "learning_rate": 7.451034482758621e-06, "loss": 0.0451, "step": 4200 }, { "epoch": 3.5896346644010197, "grad_norm": 1.9468519687652588, "learning_rate": 7.433793103448277e-06, "loss": 0.0485, "step": 4225 }, { "epoch": 3.610875106202209, "grad_norm": 1.1277813911437988, "learning_rate": 7.416551724137932e-06, "loss": 0.0447, "step": 4250 }, { "epoch": 3.6321155480033984, "grad_norm": 1.3325130939483643, "learning_rate": 7.3993103448275875e-06, "loss": 0.0459, "step": 4275 }, { "epoch": 3.653355989804588, "grad_norm": 1.05362069606781, "learning_rate": 7.3820689655172415e-06, "loss": 0.0475, "step": 4300 }, { "epoch": 3.6745964316057775, "grad_norm": 1.4702696800231934, "learning_rate": 7.364827586206897e-06, "loss": 0.0496, "step": 4325 }, { "epoch": 3.695836873406967, "grad_norm": 1.3535746335983276, "learning_rate": 7.347586206896552e-06, "loss": 0.0454, "step": 4350 }, { "epoch": 3.717077315208156, "grad_norm": 2.088671922683716, "learning_rate": 7.330344827586208e-06, "loss": 0.0513, "step": 4375 }, { "epoch": 3.7383177570093458, "grad_norm": 1.3528611660003662, "learning_rate": 7.313103448275863e-06, "loss": 0.0458, "step": 4400 }, { "epoch": 3.7595581988105353, "grad_norm": 2.404648542404175, "learning_rate": 7.2958620689655175e-06, "loss": 0.0448, "step": 4425 }, { "epoch": 3.780798640611725, "grad_norm": 1.7826001644134521, "learning_rate": 7.278620689655172e-06, "loss": 0.0439, "step": 4450 }, { "epoch": 3.8020390824129144, "grad_norm": 1.5115604400634766, "learning_rate": 7.261379310344828e-06, "loss": 0.0456, "step": 4475 }, { "epoch": 3.8232795242141036, "grad_norm": 1.6182334423065186, "learning_rate": 7.244137931034483e-06, "loss": 0.0441, "step": 4500 }, { "epoch": 3.844519966015293, "grad_norm": 1.242518663406372, "learning_rate": 7.226896551724139e-06, "loss": 0.0465, "step": 4525 }, { "epoch": 3.8657604078164827, "grad_norm": 1.5941393375396729, "learning_rate": 7.2096551724137944e-06, "loss": 0.045, "step": 4550 }, { "epoch": 3.887000849617672, "grad_norm": 1.697571873664856, "learning_rate": 7.1924137931034485e-06, "loss": 0.0487, "step": 4575 }, { "epoch": 3.908241291418862, "grad_norm": 1.3660414218902588, "learning_rate": 7.175172413793104e-06, "loss": 0.046, "step": 4600 }, { "epoch": 3.929481733220051, "grad_norm": 1.4809635877609253, "learning_rate": 7.157931034482759e-06, "loss": 0.0459, "step": 4625 }, { "epoch": 3.9507221750212405, "grad_norm": 1.4990068674087524, "learning_rate": 7.140689655172415e-06, "loss": 0.0489, "step": 4650 }, { "epoch": 3.97196261682243, "grad_norm": 1.7496356964111328, "learning_rate": 7.12344827586207e-06, "loss": 0.0448, "step": 4675 }, { "epoch": 3.993203058623619, "grad_norm": 1.5272170305252075, "learning_rate": 7.106206896551725e-06, "loss": 0.0486, "step": 4700 }, { "epoch": 4.014443500424809, "grad_norm": 1.0526211261749268, "learning_rate": 7.088965517241379e-06, "loss": 0.0352, "step": 4725 }, { "epoch": 4.035683942225998, "grad_norm": 1.301439881324768, "learning_rate": 7.071724137931035e-06, "loss": 0.0289, "step": 4750 }, { "epoch": 4.056924384027187, "grad_norm": 1.026371717453003, "learning_rate": 7.05448275862069e-06, "loss": 0.0273, "step": 4775 }, { "epoch": 4.078164825828377, "grad_norm": 1.4210519790649414, "learning_rate": 7.037241379310346e-06, "loss": 0.0271, "step": 4800 }, { "epoch": 4.0994052676295665, "grad_norm": 1.5456641912460327, "learning_rate": 7.0200000000000006e-06, "loss": 0.0282, "step": 4825 }, { "epoch": 4.1206457094307565, "grad_norm": 1.5171129703521729, "learning_rate": 7.002758620689655e-06, "loss": 0.0294, "step": 4850 }, { "epoch": 4.141886151231946, "grad_norm": 1.4718422889709473, "learning_rate": 6.98551724137931e-06, "loss": 0.0268, "step": 4875 }, { "epoch": 4.163126593033135, "grad_norm": 1.2558562755584717, "learning_rate": 6.968275862068966e-06, "loss": 0.0291, "step": 4900 }, { "epoch": 4.184367034834325, "grad_norm": 0.772474467754364, "learning_rate": 6.951034482758622e-06, "loss": 0.0279, "step": 4925 }, { "epoch": 4.205607476635514, "grad_norm": 1.2242004871368408, "learning_rate": 6.933793103448277e-06, "loss": 0.0296, "step": 4950 }, { "epoch": 4.226847918436704, "grad_norm": 1.9706392288208008, "learning_rate": 6.916551724137932e-06, "loss": 0.0293, "step": 4975 }, { "epoch": 4.248088360237893, "grad_norm": 1.1294701099395752, "learning_rate": 6.899310344827586e-06, "loss": 0.0288, "step": 5000 }, { "epoch": 4.248088360237893, "eval_loss": 0.14424686133861542, "eval_runtime": 5968.4953, "eval_samples_per_second": 0.79, "eval_steps_per_second": 0.099, "eval_wer": 35.7292751785256, "step": 5000 } ], "logging_steps": 25, "max_steps": 15000, "num_input_tokens_seen": 0, "num_train_epochs": 13, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.161165082492928e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }