|
{ |
|
"best_metric": 35.086130108549604, |
|
"best_model_checkpoint": "./whisper-medium-be/checkpoint-3000", |
|
"epoch": 4.248088360237893, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.021240441801189464, |
|
"grad_norm": 15.320006370544434, |
|
"learning_rate": 4.4e-07, |
|
"loss": 1.2164, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04248088360237893, |
|
"grad_norm": 8.513395309448242, |
|
"learning_rate": 9.200000000000001e-07, |
|
"loss": 0.9688, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0637213254035684, |
|
"grad_norm": 8.396470069885254, |
|
"learning_rate": 1.42e-06, |
|
"loss": 0.6889, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08496176720475786, |
|
"grad_norm": 5.034422874450684, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 0.5062, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10620220900594732, |
|
"grad_norm": 4.539931774139404, |
|
"learning_rate": 2.42e-06, |
|
"loss": 0.3953, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1274426508071368, |
|
"grad_norm": 5.459708213806152, |
|
"learning_rate": 2.92e-06, |
|
"loss": 0.3239, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14868309260832624, |
|
"grad_norm": 4.348228931427002, |
|
"learning_rate": 3.4200000000000007e-06, |
|
"loss": 0.2959, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.16992353440951571, |
|
"grad_norm": 4.323150634765625, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 0.2555, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1911639762107052, |
|
"grad_norm": 3.7983508110046387, |
|
"learning_rate": 4.42e-06, |
|
"loss": 0.2441, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.21240441801189464, |
|
"grad_norm": 5.1654744148254395, |
|
"learning_rate": 4.92e-06, |
|
"loss": 0.2362, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2336448598130841, |
|
"grad_norm": 4.663578510284424, |
|
"learning_rate": 5.420000000000001e-06, |
|
"loss": 0.2136, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2548853016142736, |
|
"grad_norm": 4.335070610046387, |
|
"learning_rate": 5.92e-06, |
|
"loss": 0.2166, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.27612574341546303, |
|
"grad_norm": 4.008061408996582, |
|
"learning_rate": 6.42e-06, |
|
"loss": 0.198, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2973661852166525, |
|
"grad_norm": 3.7923827171325684, |
|
"learning_rate": 6.92e-06, |
|
"loss": 0.2029, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.318606627017842, |
|
"grad_norm": 2.872164487838745, |
|
"learning_rate": 7.420000000000001e-06, |
|
"loss": 0.2017, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.33984706881903143, |
|
"grad_norm": 4.224575996398926, |
|
"learning_rate": 7.92e-06, |
|
"loss": 0.1787, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3610875106202209, |
|
"grad_norm": 2.8672895431518555, |
|
"learning_rate": 8.42e-06, |
|
"loss": 0.1834, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3823279524214104, |
|
"grad_norm": 4.826914310455322, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 0.1806, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4035683942225998, |
|
"grad_norm": 3.9499950408935547, |
|
"learning_rate": 9.42e-06, |
|
"loss": 0.1944, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.42480883602378927, |
|
"grad_norm": 2.290720224380493, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 0.1816, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4460492778249788, |
|
"grad_norm": 3.0552995204925537, |
|
"learning_rate": 9.98551724137931e-06, |
|
"loss": 0.1812, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4672897196261682, |
|
"grad_norm": 2.8061227798461914, |
|
"learning_rate": 9.968275862068967e-06, |
|
"loss": 0.1702, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.48853016142735767, |
|
"grad_norm": 2.786482334136963, |
|
"learning_rate": 9.951034482758622e-06, |
|
"loss": 0.1557, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5097706032285472, |
|
"grad_norm": 2.803558588027954, |
|
"learning_rate": 9.933793103448277e-06, |
|
"loss": 0.154, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5310110450297366, |
|
"grad_norm": 2.788571834564209, |
|
"learning_rate": 9.916551724137932e-06, |
|
"loss": 0.1568, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5522514868309261, |
|
"grad_norm": 2.8980190753936768, |
|
"learning_rate": 9.899310344827588e-06, |
|
"loss": 0.1564, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5734919286321155, |
|
"grad_norm": 2.7520530223846436, |
|
"learning_rate": 9.882068965517241e-06, |
|
"loss": 0.1515, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.594732370433305, |
|
"grad_norm": 3.0198278427124023, |
|
"learning_rate": 9.864827586206898e-06, |
|
"loss": 0.1415, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6159728122344945, |
|
"grad_norm": 3.080418348312378, |
|
"learning_rate": 9.847586206896553e-06, |
|
"loss": 0.1539, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.637213254035684, |
|
"grad_norm": 3.3412156105041504, |
|
"learning_rate": 9.830344827586208e-06, |
|
"loss": 0.1411, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6584536958368734, |
|
"grad_norm": 2.7141354084014893, |
|
"learning_rate": 9.813103448275862e-06, |
|
"loss": 0.1494, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6796941376380629, |
|
"grad_norm": 3.718287944793701, |
|
"learning_rate": 9.795862068965517e-06, |
|
"loss": 0.1395, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7009345794392523, |
|
"grad_norm": 2.5397861003875732, |
|
"learning_rate": 9.778620689655172e-06, |
|
"loss": 0.1439, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7221750212404418, |
|
"grad_norm": 2.6340572834014893, |
|
"learning_rate": 9.761379310344829e-06, |
|
"loss": 0.1365, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7434154630416313, |
|
"grad_norm": 3.546412706375122, |
|
"learning_rate": 9.744137931034484e-06, |
|
"loss": 0.1358, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7646559048428208, |
|
"grad_norm": 2.8890087604522705, |
|
"learning_rate": 9.726896551724139e-06, |
|
"loss": 0.1408, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7858963466440102, |
|
"grad_norm": 2.474982500076294, |
|
"learning_rate": 9.709655172413795e-06, |
|
"loss": 0.1387, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8071367884451996, |
|
"grad_norm": 2.0758702754974365, |
|
"learning_rate": 9.692413793103448e-06, |
|
"loss": 0.1354, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8283772302463891, |
|
"grad_norm": 2.4454121589660645, |
|
"learning_rate": 9.675172413793105e-06, |
|
"loss": 0.1319, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.8496176720475785, |
|
"grad_norm": 4.150802135467529, |
|
"learning_rate": 9.65793103448276e-06, |
|
"loss": 0.14, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8496176720475785, |
|
"eval_loss": 0.13197487592697144, |
|
"eval_runtime": 5979.5965, |
|
"eval_samples_per_second": 0.789, |
|
"eval_steps_per_second": 0.099, |
|
"eval_wer": 39.63041804429548, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8708581138487681, |
|
"grad_norm": 3.1269190311431885, |
|
"learning_rate": 9.640689655172415e-06, |
|
"loss": 0.1284, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8920985556499575, |
|
"grad_norm": 2.9756011962890625, |
|
"learning_rate": 9.62344827586207e-06, |
|
"loss": 0.1238, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.913338997451147, |
|
"grad_norm": 2.2128994464874268, |
|
"learning_rate": 9.606206896551726e-06, |
|
"loss": 0.129, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 3.9295644760131836, |
|
"learning_rate": 9.58896551724138e-06, |
|
"loss": 0.1366, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9558198810535259, |
|
"grad_norm": 2.3344342708587646, |
|
"learning_rate": 9.571724137931036e-06, |
|
"loss": 0.1373, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.9770603228547153, |
|
"grad_norm": 3.186511516571045, |
|
"learning_rate": 9.55448275862069e-06, |
|
"loss": 0.1396, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9983007646559049, |
|
"grad_norm": 1.856980562210083, |
|
"learning_rate": 9.537241379310345e-06, |
|
"loss": 0.1226, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.0195412064570943, |
|
"grad_norm": 2.5547516345977783, |
|
"learning_rate": 9.52e-06, |
|
"loss": 0.0963, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0407816482582837, |
|
"grad_norm": 2.0279276371002197, |
|
"learning_rate": 9.502758620689655e-06, |
|
"loss": 0.0973, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.0620220900594732, |
|
"grad_norm": 2.0436954498291016, |
|
"learning_rate": 9.48551724137931e-06, |
|
"loss": 0.0991, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0832625318606628, |
|
"grad_norm": 2.1336090564727783, |
|
"learning_rate": 9.468275862068967e-06, |
|
"loss": 0.1029, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.1045029736618521, |
|
"grad_norm": 3.0516738891601562, |
|
"learning_rate": 9.451034482758622e-06, |
|
"loss": 0.1043, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.1257434154630417, |
|
"grad_norm": 2.6009092330932617, |
|
"learning_rate": 9.433793103448276e-06, |
|
"loss": 0.0995, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.146983857264231, |
|
"grad_norm": 2.47499680519104, |
|
"learning_rate": 9.416551724137933e-06, |
|
"loss": 0.1036, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.1682242990654206, |
|
"grad_norm": 1.6448906660079956, |
|
"learning_rate": 9.399310344827586e-06, |
|
"loss": 0.1011, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.18946474086661, |
|
"grad_norm": 1.9409310817718506, |
|
"learning_rate": 9.382068965517243e-06, |
|
"loss": 0.1087, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.2107051826677995, |
|
"grad_norm": 2.1227598190307617, |
|
"learning_rate": 9.364827586206898e-06, |
|
"loss": 0.1038, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.231945624468989, |
|
"grad_norm": 2.4984381198883057, |
|
"learning_rate": 9.347586206896552e-06, |
|
"loss": 0.093, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.2531860662701784, |
|
"grad_norm": 1.5227315425872803, |
|
"learning_rate": 9.330344827586207e-06, |
|
"loss": 0.1014, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.274426508071368, |
|
"grad_norm": 2.6108906269073486, |
|
"learning_rate": 9.313103448275864e-06, |
|
"loss": 0.095, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.2956669498725573, |
|
"grad_norm": 3.612428903579712, |
|
"learning_rate": 9.295862068965517e-06, |
|
"loss": 0.0966, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.3169073916737468, |
|
"grad_norm": 2.017115831375122, |
|
"learning_rate": 9.278620689655174e-06, |
|
"loss": 0.1033, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.3381478334749364, |
|
"grad_norm": 2.264723539352417, |
|
"learning_rate": 9.261379310344828e-06, |
|
"loss": 0.1043, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.3593882752761257, |
|
"grad_norm": 2.3359768390655518, |
|
"learning_rate": 9.244137931034483e-06, |
|
"loss": 0.106, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.3806287170773153, |
|
"grad_norm": 3.682831287384033, |
|
"learning_rate": 9.226896551724138e-06, |
|
"loss": 0.1072, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.4018691588785046, |
|
"grad_norm": 1.7336595058441162, |
|
"learning_rate": 9.209655172413793e-06, |
|
"loss": 0.0998, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.4231096006796942, |
|
"grad_norm": 3.3865325450897217, |
|
"learning_rate": 9.192413793103448e-06, |
|
"loss": 0.0977, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.4443500424808837, |
|
"grad_norm": 1.7132407426834106, |
|
"learning_rate": 9.175172413793105e-06, |
|
"loss": 0.0966, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.465590484282073, |
|
"grad_norm": 2.182499647140503, |
|
"learning_rate": 9.15793103448276e-06, |
|
"loss": 0.0993, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.4868309260832624, |
|
"grad_norm": 2.137913942337036, |
|
"learning_rate": 9.140689655172414e-06, |
|
"loss": 0.0959, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.508071367884452, |
|
"grad_norm": 1.5623725652694702, |
|
"learning_rate": 9.12344827586207e-06, |
|
"loss": 0.0975, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.5293118096856415, |
|
"grad_norm": 1.7139003276824951, |
|
"learning_rate": 9.106206896551724e-06, |
|
"loss": 0.0903, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.550552251486831, |
|
"grad_norm": 1.6749699115753174, |
|
"learning_rate": 9.08896551724138e-06, |
|
"loss": 0.1032, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.5717926932880204, |
|
"grad_norm": 1.4775348901748657, |
|
"learning_rate": 9.071724137931035e-06, |
|
"loss": 0.0938, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.5930331350892097, |
|
"grad_norm": 1.4703927040100098, |
|
"learning_rate": 9.05448275862069e-06, |
|
"loss": 0.0893, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.6142735768903993, |
|
"grad_norm": 1.614033579826355, |
|
"learning_rate": 9.037241379310345e-06, |
|
"loss": 0.0986, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.6355140186915889, |
|
"grad_norm": 1.822026014328003, |
|
"learning_rate": 9.020000000000002e-06, |
|
"loss": 0.0993, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.6567544604927784, |
|
"grad_norm": 3.0836634635925293, |
|
"learning_rate": 9.002758620689655e-06, |
|
"loss": 0.1044, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.6779949022939677, |
|
"grad_norm": 1.8662205934524536, |
|
"learning_rate": 8.985517241379311e-06, |
|
"loss": 0.105, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.699235344095157, |
|
"grad_norm": 1.576250433921814, |
|
"learning_rate": 8.968275862068966e-06, |
|
"loss": 0.0945, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.699235344095157, |
|
"eval_loss": 0.11690503358840942, |
|
"eval_runtime": 5994.3147, |
|
"eval_samples_per_second": 0.787, |
|
"eval_steps_per_second": 0.098, |
|
"eval_wer": 36.85704364630041, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.7204757858963466, |
|
"grad_norm": 2.065964937210083, |
|
"learning_rate": 8.951034482758621e-06, |
|
"loss": 0.0974, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.7417162276975362, |
|
"grad_norm": 1.7408511638641357, |
|
"learning_rate": 8.933793103448276e-06, |
|
"loss": 0.1026, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.7629566694987255, |
|
"grad_norm": 1.8468472957611084, |
|
"learning_rate": 8.916551724137931e-06, |
|
"loss": 0.0868, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.7841971112999149, |
|
"grad_norm": 1.8215452432632446, |
|
"learning_rate": 8.899310344827588e-06, |
|
"loss": 0.0957, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.8054375531011044, |
|
"grad_norm": 1.5454055070877075, |
|
"learning_rate": 8.882068965517242e-06, |
|
"loss": 0.0948, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.826677994902294, |
|
"grad_norm": 1.947662115097046, |
|
"learning_rate": 8.864827586206897e-06, |
|
"loss": 0.0971, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.8479184367034835, |
|
"grad_norm": 1.5574594736099243, |
|
"learning_rate": 8.847586206896552e-06, |
|
"loss": 0.0917, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"grad_norm": 1.2428698539733887, |
|
"learning_rate": 8.830344827586209e-06, |
|
"loss": 0.1034, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.8903993203058622, |
|
"grad_norm": 1.9029223918914795, |
|
"learning_rate": 8.813103448275862e-06, |
|
"loss": 0.0978, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.9116397621070518, |
|
"grad_norm": 2.2894680500030518, |
|
"learning_rate": 8.795862068965518e-06, |
|
"loss": 0.0975, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.9328802039082413, |
|
"grad_norm": 1.284964680671692, |
|
"learning_rate": 8.778620689655173e-06, |
|
"loss": 0.096, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.954120645709431, |
|
"grad_norm": 1.7411472797393799, |
|
"learning_rate": 8.761379310344828e-06, |
|
"loss": 0.0961, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.9753610875106202, |
|
"grad_norm": 2.421389102935791, |
|
"learning_rate": 8.744137931034483e-06, |
|
"loss": 0.0954, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.9966015293118096, |
|
"grad_norm": 2.0307414531707764, |
|
"learning_rate": 8.72689655172414e-06, |
|
"loss": 0.1041, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.017841971112999, |
|
"grad_norm": 2.228976249694824, |
|
"learning_rate": 8.709655172413793e-06, |
|
"loss": 0.0677, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 2.0390824129141887, |
|
"grad_norm": 2.581620693206787, |
|
"learning_rate": 8.69241379310345e-06, |
|
"loss": 0.07, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.0603228547153782, |
|
"grad_norm": 1.4978506565093994, |
|
"learning_rate": 8.675172413793104e-06, |
|
"loss": 0.0694, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 2.0815632965165674, |
|
"grad_norm": 1.972231388092041, |
|
"learning_rate": 8.657931034482759e-06, |
|
"loss": 0.0667, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.102803738317757, |
|
"grad_norm": 2.09955096244812, |
|
"learning_rate": 8.640689655172414e-06, |
|
"loss": 0.0663, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 2.1240441801189465, |
|
"grad_norm": 2.7959463596343994, |
|
"learning_rate": 8.623448275862069e-06, |
|
"loss": 0.0685, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.145284621920136, |
|
"grad_norm": 1.7082490921020508, |
|
"learning_rate": 8.606206896551725e-06, |
|
"loss": 0.0631, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 2.1665250637213256, |
|
"grad_norm": 1.101549506187439, |
|
"learning_rate": 8.58896551724138e-06, |
|
"loss": 0.0745, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.1877655055225147, |
|
"grad_norm": 1.121897578239441, |
|
"learning_rate": 8.571724137931035e-06, |
|
"loss": 0.0637, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.2090059473237043, |
|
"grad_norm": 1.6201415061950684, |
|
"learning_rate": 8.55448275862069e-06, |
|
"loss": 0.0668, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.230246389124894, |
|
"grad_norm": 1.5426428318023682, |
|
"learning_rate": 8.537241379310347e-06, |
|
"loss": 0.0729, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.2514868309260834, |
|
"grad_norm": 1.564487099647522, |
|
"learning_rate": 8.52e-06, |
|
"loss": 0.0687, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 1.3694915771484375, |
|
"learning_rate": 8.502758620689656e-06, |
|
"loss": 0.0638, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.293967714528462, |
|
"grad_norm": 1.5504151582717896, |
|
"learning_rate": 8.485517241379311e-06, |
|
"loss": 0.0655, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.3152081563296516, |
|
"grad_norm": 1.066007375717163, |
|
"learning_rate": 8.468275862068966e-06, |
|
"loss": 0.0727, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.336448598130841, |
|
"grad_norm": 1.760932207107544, |
|
"learning_rate": 8.451034482758621e-06, |
|
"loss": 0.0665, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.3576890399320307, |
|
"grad_norm": 1.405328392982483, |
|
"learning_rate": 8.433793103448277e-06, |
|
"loss": 0.0672, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.37892948173322, |
|
"grad_norm": 2.686798572540283, |
|
"learning_rate": 8.41655172413793e-06, |
|
"loss": 0.0686, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.4001699235344094, |
|
"grad_norm": 1.3735514879226685, |
|
"learning_rate": 8.399310344827587e-06, |
|
"loss": 0.0659, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.421410365335599, |
|
"grad_norm": 1.1501052379608154, |
|
"learning_rate": 8.382068965517242e-06, |
|
"loss": 0.0681, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.4426508071367885, |
|
"grad_norm": 1.6193690299987793, |
|
"learning_rate": 8.364827586206897e-06, |
|
"loss": 0.0698, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.463891248937978, |
|
"grad_norm": 1.5359407663345337, |
|
"learning_rate": 8.347586206896552e-06, |
|
"loss": 0.0662, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.485131690739167, |
|
"grad_norm": 1.283834457397461, |
|
"learning_rate": 8.330344827586208e-06, |
|
"loss": 0.0659, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.5063721325403567, |
|
"grad_norm": 1.861663818359375, |
|
"learning_rate": 8.313103448275863e-06, |
|
"loss": 0.0692, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.5276125743415463, |
|
"grad_norm": 2.0781443119049072, |
|
"learning_rate": 8.295862068965518e-06, |
|
"loss": 0.0647, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.548853016142736, |
|
"grad_norm": 1.4371556043624878, |
|
"learning_rate": 8.278620689655173e-06, |
|
"loss": 0.0673, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.548853016142736, |
|
"eval_loss": 0.11657554656267166, |
|
"eval_runtime": 5993.138, |
|
"eval_samples_per_second": 0.787, |
|
"eval_steps_per_second": 0.098, |
|
"eval_wer": 35.086130108549604, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.5700934579439254, |
|
"grad_norm": 1.3172967433929443, |
|
"learning_rate": 8.261379310344828e-06, |
|
"loss": 0.0713, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.5913338997451145, |
|
"grad_norm": 1.6619157791137695, |
|
"learning_rate": 8.244137931034484e-06, |
|
"loss": 0.0642, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.612574341546304, |
|
"grad_norm": 2.0745792388916016, |
|
"learning_rate": 8.226896551724138e-06, |
|
"loss": 0.0666, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.6338147833474936, |
|
"grad_norm": 1.6569820642471313, |
|
"learning_rate": 8.209655172413794e-06, |
|
"loss": 0.0623, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.655055225148683, |
|
"grad_norm": 2.5140037536621094, |
|
"learning_rate": 8.192413793103449e-06, |
|
"loss": 0.0726, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.6762956669498728, |
|
"grad_norm": 1.3549573421478271, |
|
"learning_rate": 8.175172413793104e-06, |
|
"loss": 0.0678, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.697536108751062, |
|
"grad_norm": 2.3230879306793213, |
|
"learning_rate": 8.157931034482759e-06, |
|
"loss": 0.0654, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.7187765505522514, |
|
"grad_norm": 1.4579910039901733, |
|
"learning_rate": 8.140689655172415e-06, |
|
"loss": 0.0648, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.740016992353441, |
|
"grad_norm": 2.045642852783203, |
|
"learning_rate": 8.123448275862069e-06, |
|
"loss": 0.0671, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.7612574341546305, |
|
"grad_norm": 1.847688913345337, |
|
"learning_rate": 8.106206896551725e-06, |
|
"loss": 0.0665, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.78249787595582, |
|
"grad_norm": 2.109140396118164, |
|
"learning_rate": 8.08896551724138e-06, |
|
"loss": 0.0765, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.803738317757009, |
|
"grad_norm": 1.8210623264312744, |
|
"learning_rate": 8.071724137931035e-06, |
|
"loss": 0.0684, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.8249787595581988, |
|
"grad_norm": 1.6090059280395508, |
|
"learning_rate": 8.054482758620691e-06, |
|
"loss": 0.0667, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.8462192013593883, |
|
"grad_norm": 1.2714420557022095, |
|
"learning_rate": 8.037241379310346e-06, |
|
"loss": 0.0668, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.867459643160578, |
|
"grad_norm": 2.028714656829834, |
|
"learning_rate": 8.020000000000001e-06, |
|
"loss": 0.0637, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.8887000849617674, |
|
"grad_norm": 1.2474462985992432, |
|
"learning_rate": 8.002758620689656e-06, |
|
"loss": 0.0654, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.9099405267629566, |
|
"grad_norm": 1.373960018157959, |
|
"learning_rate": 7.985517241379311e-06, |
|
"loss": 0.066, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.931180968564146, |
|
"grad_norm": 1.931864857673645, |
|
"learning_rate": 7.968275862068966e-06, |
|
"loss": 0.0685, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.9524214103653357, |
|
"grad_norm": 2.4559192657470703, |
|
"learning_rate": 7.951034482758622e-06, |
|
"loss": 0.072, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.973661852166525, |
|
"grad_norm": 2.4385876655578613, |
|
"learning_rate": 7.933793103448275e-06, |
|
"loss": 0.0695, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.994902293967715, |
|
"grad_norm": 1.8550530672073364, |
|
"learning_rate": 7.916551724137932e-06, |
|
"loss": 0.0714, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 3.016142735768904, |
|
"grad_norm": 1.8583050966262817, |
|
"learning_rate": 7.899310344827587e-06, |
|
"loss": 0.051, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 3.0373831775700935, |
|
"grad_norm": 1.2027584314346313, |
|
"learning_rate": 7.882068965517242e-06, |
|
"loss": 0.0412, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 3.058623619371283, |
|
"grad_norm": 1.6738337278366089, |
|
"learning_rate": 7.864827586206897e-06, |
|
"loss": 0.0476, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.0798640611724726, |
|
"grad_norm": 1.2990750074386597, |
|
"learning_rate": 7.847586206896553e-06, |
|
"loss": 0.0445, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 3.1011045029736617, |
|
"grad_norm": 1.6865178346633911, |
|
"learning_rate": 7.830344827586206e-06, |
|
"loss": 0.0435, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 3.1223449447748512, |
|
"grad_norm": 1.803493618965149, |
|
"learning_rate": 7.813103448275863e-06, |
|
"loss": 0.0437, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 3.143585386576041, |
|
"grad_norm": 1.9559437036514282, |
|
"learning_rate": 7.795862068965518e-06, |
|
"loss": 0.0433, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.1648258283772304, |
|
"grad_norm": 1.1054346561431885, |
|
"learning_rate": 7.778620689655173e-06, |
|
"loss": 0.0446, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 3.1860662701784195, |
|
"grad_norm": 1.3556439876556396, |
|
"learning_rate": 7.76137931034483e-06, |
|
"loss": 0.0461, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.207306711979609, |
|
"grad_norm": 1.6769301891326904, |
|
"learning_rate": 7.744137931034484e-06, |
|
"loss": 0.0443, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 3.2285471537807986, |
|
"grad_norm": 1.7046959400177002, |
|
"learning_rate": 7.726896551724139e-06, |
|
"loss": 0.0451, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.249787595581988, |
|
"grad_norm": 2.6806883811950684, |
|
"learning_rate": 7.709655172413794e-06, |
|
"loss": 0.0441, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 3.2710280373831777, |
|
"grad_norm": 1.0329188108444214, |
|
"learning_rate": 7.692413793103449e-06, |
|
"loss": 0.0458, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 3.292268479184367, |
|
"grad_norm": 1.2138526439666748, |
|
"learning_rate": 7.675172413793104e-06, |
|
"loss": 0.0448, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 3.3135089209855564, |
|
"grad_norm": 0.8664061427116394, |
|
"learning_rate": 7.65793103448276e-06, |
|
"loss": 0.0466, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.334749362786746, |
|
"grad_norm": 0.9848700761795044, |
|
"learning_rate": 7.640689655172413e-06, |
|
"loss": 0.0502, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 3.3559898045879355, |
|
"grad_norm": 1.179836392402649, |
|
"learning_rate": 7.62344827586207e-06, |
|
"loss": 0.0462, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 3.377230246389125, |
|
"grad_norm": 2.3013267517089844, |
|
"learning_rate": 7.606206896551725e-06, |
|
"loss": 0.0444, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 3.398470688190314, |
|
"grad_norm": 1.637501835823059, |
|
"learning_rate": 7.58896551724138e-06, |
|
"loss": 0.0475, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.398470688190314, |
|
"eval_loss": 0.12786675989627838, |
|
"eval_runtime": 5959.6356, |
|
"eval_samples_per_second": 0.791, |
|
"eval_steps_per_second": 0.099, |
|
"eval_wer": 35.16010688889896, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.4197111299915037, |
|
"grad_norm": 1.6689115762710571, |
|
"learning_rate": 7.5717241379310345e-06, |
|
"loss": 0.0424, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 3.4409515717926933, |
|
"grad_norm": 1.9614856243133545, |
|
"learning_rate": 7.55448275862069e-06, |
|
"loss": 0.0485, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 3.462192013593883, |
|
"grad_norm": 1.556401014328003, |
|
"learning_rate": 7.537241379310345e-06, |
|
"loss": 0.0459, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 3.4834324553950724, |
|
"grad_norm": 1.4535021781921387, |
|
"learning_rate": 7.520000000000001e-06, |
|
"loss": 0.0447, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.5046728971962615, |
|
"grad_norm": 0.9936099648475647, |
|
"learning_rate": 7.5027586206896566e-06, |
|
"loss": 0.0465, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 3.525913338997451, |
|
"grad_norm": 1.7872281074523926, |
|
"learning_rate": 7.485517241379311e-06, |
|
"loss": 0.0504, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.5471537807986406, |
|
"grad_norm": 1.3985124826431274, |
|
"learning_rate": 7.468275862068966e-06, |
|
"loss": 0.0452, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 3.56839422259983, |
|
"grad_norm": 1.2840551137924194, |
|
"learning_rate": 7.451034482758621e-06, |
|
"loss": 0.0451, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.5896346644010197, |
|
"grad_norm": 1.9468519687652588, |
|
"learning_rate": 7.433793103448277e-06, |
|
"loss": 0.0485, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 3.610875106202209, |
|
"grad_norm": 1.1277813911437988, |
|
"learning_rate": 7.416551724137932e-06, |
|
"loss": 0.0447, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.6321155480033984, |
|
"grad_norm": 1.3325130939483643, |
|
"learning_rate": 7.3993103448275875e-06, |
|
"loss": 0.0459, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 3.653355989804588, |
|
"grad_norm": 1.05362069606781, |
|
"learning_rate": 7.3820689655172415e-06, |
|
"loss": 0.0475, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.6745964316057775, |
|
"grad_norm": 1.4702696800231934, |
|
"learning_rate": 7.364827586206897e-06, |
|
"loss": 0.0496, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 3.695836873406967, |
|
"grad_norm": 1.3535746335983276, |
|
"learning_rate": 7.347586206896552e-06, |
|
"loss": 0.0454, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 3.717077315208156, |
|
"grad_norm": 2.088671922683716, |
|
"learning_rate": 7.330344827586208e-06, |
|
"loss": 0.0513, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 3.7383177570093458, |
|
"grad_norm": 1.3528611660003662, |
|
"learning_rate": 7.313103448275863e-06, |
|
"loss": 0.0458, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.7595581988105353, |
|
"grad_norm": 2.404648542404175, |
|
"learning_rate": 7.2958620689655175e-06, |
|
"loss": 0.0448, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 3.780798640611725, |
|
"grad_norm": 1.7826001644134521, |
|
"learning_rate": 7.278620689655172e-06, |
|
"loss": 0.0439, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 3.8020390824129144, |
|
"grad_norm": 1.5115604400634766, |
|
"learning_rate": 7.261379310344828e-06, |
|
"loss": 0.0456, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 3.8232795242141036, |
|
"grad_norm": 1.6182334423065186, |
|
"learning_rate": 7.244137931034483e-06, |
|
"loss": 0.0441, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.844519966015293, |
|
"grad_norm": 1.242518663406372, |
|
"learning_rate": 7.226896551724139e-06, |
|
"loss": 0.0465, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 3.8657604078164827, |
|
"grad_norm": 1.5941393375396729, |
|
"learning_rate": 7.2096551724137944e-06, |
|
"loss": 0.045, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.887000849617672, |
|
"grad_norm": 1.697571873664856, |
|
"learning_rate": 7.1924137931034485e-06, |
|
"loss": 0.0487, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 3.908241291418862, |
|
"grad_norm": 1.3660414218902588, |
|
"learning_rate": 7.175172413793104e-06, |
|
"loss": 0.046, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.929481733220051, |
|
"grad_norm": 1.4809635877609253, |
|
"learning_rate": 7.157931034482759e-06, |
|
"loss": 0.0459, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 3.9507221750212405, |
|
"grad_norm": 1.4990068674087524, |
|
"learning_rate": 7.140689655172415e-06, |
|
"loss": 0.0489, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.97196261682243, |
|
"grad_norm": 1.7496356964111328, |
|
"learning_rate": 7.12344827586207e-06, |
|
"loss": 0.0448, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 3.993203058623619, |
|
"grad_norm": 1.5272170305252075, |
|
"learning_rate": 7.106206896551725e-06, |
|
"loss": 0.0486, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 4.014443500424809, |
|
"grad_norm": 1.0526211261749268, |
|
"learning_rate": 7.088965517241379e-06, |
|
"loss": 0.0352, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 4.035683942225998, |
|
"grad_norm": 1.301439881324768, |
|
"learning_rate": 7.071724137931035e-06, |
|
"loss": 0.0289, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.056924384027187, |
|
"grad_norm": 1.026371717453003, |
|
"learning_rate": 7.05448275862069e-06, |
|
"loss": 0.0273, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 4.078164825828377, |
|
"grad_norm": 1.4210519790649414, |
|
"learning_rate": 7.037241379310346e-06, |
|
"loss": 0.0271, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.0994052676295665, |
|
"grad_norm": 1.5456641912460327, |
|
"learning_rate": 7.0200000000000006e-06, |
|
"loss": 0.0282, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 4.1206457094307565, |
|
"grad_norm": 1.5171129703521729, |
|
"learning_rate": 7.002758620689655e-06, |
|
"loss": 0.0294, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 4.141886151231946, |
|
"grad_norm": 1.4718422889709473, |
|
"learning_rate": 6.98551724137931e-06, |
|
"loss": 0.0268, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 4.163126593033135, |
|
"grad_norm": 1.2558562755584717, |
|
"learning_rate": 6.968275862068966e-06, |
|
"loss": 0.0291, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 4.184367034834325, |
|
"grad_norm": 0.772474467754364, |
|
"learning_rate": 6.951034482758622e-06, |
|
"loss": 0.0279, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 4.205607476635514, |
|
"grad_norm": 1.2242004871368408, |
|
"learning_rate": 6.933793103448277e-06, |
|
"loss": 0.0296, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 4.226847918436704, |
|
"grad_norm": 1.9706392288208008, |
|
"learning_rate": 6.916551724137932e-06, |
|
"loss": 0.0293, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 4.248088360237893, |
|
"grad_norm": 1.1294701099395752, |
|
"learning_rate": 6.899310344827586e-06, |
|
"loss": 0.0288, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.248088360237893, |
|
"eval_loss": 0.14424686133861542, |
|
"eval_runtime": 5968.4953, |
|
"eval_samples_per_second": 0.79, |
|
"eval_steps_per_second": 0.099, |
|
"eval_wer": 35.7292751785256, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 13, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.161165082492928e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|