whisper-medium-vaani-bengali / trainer_state.json
SujithPulikodan's picture
Upload 11 files
cf9210a verified
{
"best_metric": 35.086130108549604,
"best_model_checkpoint": "./whisper-medium-be/checkpoint-3000",
"epoch": 4.248088360237893,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.021240441801189464,
"grad_norm": 15.320006370544434,
"learning_rate": 4.4e-07,
"loss": 1.2164,
"step": 25
},
{
"epoch": 0.04248088360237893,
"grad_norm": 8.513395309448242,
"learning_rate": 9.200000000000001e-07,
"loss": 0.9688,
"step": 50
},
{
"epoch": 0.0637213254035684,
"grad_norm": 8.396470069885254,
"learning_rate": 1.42e-06,
"loss": 0.6889,
"step": 75
},
{
"epoch": 0.08496176720475786,
"grad_norm": 5.034422874450684,
"learning_rate": 1.9200000000000003e-06,
"loss": 0.5062,
"step": 100
},
{
"epoch": 0.10620220900594732,
"grad_norm": 4.539931774139404,
"learning_rate": 2.42e-06,
"loss": 0.3953,
"step": 125
},
{
"epoch": 0.1274426508071368,
"grad_norm": 5.459708213806152,
"learning_rate": 2.92e-06,
"loss": 0.3239,
"step": 150
},
{
"epoch": 0.14868309260832624,
"grad_norm": 4.348228931427002,
"learning_rate": 3.4200000000000007e-06,
"loss": 0.2959,
"step": 175
},
{
"epoch": 0.16992353440951571,
"grad_norm": 4.323150634765625,
"learning_rate": 3.920000000000001e-06,
"loss": 0.2555,
"step": 200
},
{
"epoch": 0.1911639762107052,
"grad_norm": 3.7983508110046387,
"learning_rate": 4.42e-06,
"loss": 0.2441,
"step": 225
},
{
"epoch": 0.21240441801189464,
"grad_norm": 5.1654744148254395,
"learning_rate": 4.92e-06,
"loss": 0.2362,
"step": 250
},
{
"epoch": 0.2336448598130841,
"grad_norm": 4.663578510284424,
"learning_rate": 5.420000000000001e-06,
"loss": 0.2136,
"step": 275
},
{
"epoch": 0.2548853016142736,
"grad_norm": 4.335070610046387,
"learning_rate": 5.92e-06,
"loss": 0.2166,
"step": 300
},
{
"epoch": 0.27612574341546303,
"grad_norm": 4.008061408996582,
"learning_rate": 6.42e-06,
"loss": 0.198,
"step": 325
},
{
"epoch": 0.2973661852166525,
"grad_norm": 3.7923827171325684,
"learning_rate": 6.92e-06,
"loss": 0.2029,
"step": 350
},
{
"epoch": 0.318606627017842,
"grad_norm": 2.872164487838745,
"learning_rate": 7.420000000000001e-06,
"loss": 0.2017,
"step": 375
},
{
"epoch": 0.33984706881903143,
"grad_norm": 4.224575996398926,
"learning_rate": 7.92e-06,
"loss": 0.1787,
"step": 400
},
{
"epoch": 0.3610875106202209,
"grad_norm": 2.8672895431518555,
"learning_rate": 8.42e-06,
"loss": 0.1834,
"step": 425
},
{
"epoch": 0.3823279524214104,
"grad_norm": 4.826914310455322,
"learning_rate": 8.920000000000001e-06,
"loss": 0.1806,
"step": 450
},
{
"epoch": 0.4035683942225998,
"grad_norm": 3.9499950408935547,
"learning_rate": 9.42e-06,
"loss": 0.1944,
"step": 475
},
{
"epoch": 0.42480883602378927,
"grad_norm": 2.290720224380493,
"learning_rate": 9.920000000000002e-06,
"loss": 0.1816,
"step": 500
},
{
"epoch": 0.4460492778249788,
"grad_norm": 3.0552995204925537,
"learning_rate": 9.98551724137931e-06,
"loss": 0.1812,
"step": 525
},
{
"epoch": 0.4672897196261682,
"grad_norm": 2.8061227798461914,
"learning_rate": 9.968275862068967e-06,
"loss": 0.1702,
"step": 550
},
{
"epoch": 0.48853016142735767,
"grad_norm": 2.786482334136963,
"learning_rate": 9.951034482758622e-06,
"loss": 0.1557,
"step": 575
},
{
"epoch": 0.5097706032285472,
"grad_norm": 2.803558588027954,
"learning_rate": 9.933793103448277e-06,
"loss": 0.154,
"step": 600
},
{
"epoch": 0.5310110450297366,
"grad_norm": 2.788571834564209,
"learning_rate": 9.916551724137932e-06,
"loss": 0.1568,
"step": 625
},
{
"epoch": 0.5522514868309261,
"grad_norm": 2.8980190753936768,
"learning_rate": 9.899310344827588e-06,
"loss": 0.1564,
"step": 650
},
{
"epoch": 0.5734919286321155,
"grad_norm": 2.7520530223846436,
"learning_rate": 9.882068965517241e-06,
"loss": 0.1515,
"step": 675
},
{
"epoch": 0.594732370433305,
"grad_norm": 3.0198278427124023,
"learning_rate": 9.864827586206898e-06,
"loss": 0.1415,
"step": 700
},
{
"epoch": 0.6159728122344945,
"grad_norm": 3.080418348312378,
"learning_rate": 9.847586206896553e-06,
"loss": 0.1539,
"step": 725
},
{
"epoch": 0.637213254035684,
"grad_norm": 3.3412156105041504,
"learning_rate": 9.830344827586208e-06,
"loss": 0.1411,
"step": 750
},
{
"epoch": 0.6584536958368734,
"grad_norm": 2.7141354084014893,
"learning_rate": 9.813103448275862e-06,
"loss": 0.1494,
"step": 775
},
{
"epoch": 0.6796941376380629,
"grad_norm": 3.718287944793701,
"learning_rate": 9.795862068965517e-06,
"loss": 0.1395,
"step": 800
},
{
"epoch": 0.7009345794392523,
"grad_norm": 2.5397861003875732,
"learning_rate": 9.778620689655172e-06,
"loss": 0.1439,
"step": 825
},
{
"epoch": 0.7221750212404418,
"grad_norm": 2.6340572834014893,
"learning_rate": 9.761379310344829e-06,
"loss": 0.1365,
"step": 850
},
{
"epoch": 0.7434154630416313,
"grad_norm": 3.546412706375122,
"learning_rate": 9.744137931034484e-06,
"loss": 0.1358,
"step": 875
},
{
"epoch": 0.7646559048428208,
"grad_norm": 2.8890087604522705,
"learning_rate": 9.726896551724139e-06,
"loss": 0.1408,
"step": 900
},
{
"epoch": 0.7858963466440102,
"grad_norm": 2.474982500076294,
"learning_rate": 9.709655172413795e-06,
"loss": 0.1387,
"step": 925
},
{
"epoch": 0.8071367884451996,
"grad_norm": 2.0758702754974365,
"learning_rate": 9.692413793103448e-06,
"loss": 0.1354,
"step": 950
},
{
"epoch": 0.8283772302463891,
"grad_norm": 2.4454121589660645,
"learning_rate": 9.675172413793105e-06,
"loss": 0.1319,
"step": 975
},
{
"epoch": 0.8496176720475785,
"grad_norm": 4.150802135467529,
"learning_rate": 9.65793103448276e-06,
"loss": 0.14,
"step": 1000
},
{
"epoch": 0.8496176720475785,
"eval_loss": 0.13197487592697144,
"eval_runtime": 5979.5965,
"eval_samples_per_second": 0.789,
"eval_steps_per_second": 0.099,
"eval_wer": 39.63041804429548,
"step": 1000
},
{
"epoch": 0.8708581138487681,
"grad_norm": 3.1269190311431885,
"learning_rate": 9.640689655172415e-06,
"loss": 0.1284,
"step": 1025
},
{
"epoch": 0.8920985556499575,
"grad_norm": 2.9756011962890625,
"learning_rate": 9.62344827586207e-06,
"loss": 0.1238,
"step": 1050
},
{
"epoch": 0.913338997451147,
"grad_norm": 2.2128994464874268,
"learning_rate": 9.606206896551726e-06,
"loss": 0.129,
"step": 1075
},
{
"epoch": 0.9345794392523364,
"grad_norm": 3.9295644760131836,
"learning_rate": 9.58896551724138e-06,
"loss": 0.1366,
"step": 1100
},
{
"epoch": 0.9558198810535259,
"grad_norm": 2.3344342708587646,
"learning_rate": 9.571724137931036e-06,
"loss": 0.1373,
"step": 1125
},
{
"epoch": 0.9770603228547153,
"grad_norm": 3.186511516571045,
"learning_rate": 9.55448275862069e-06,
"loss": 0.1396,
"step": 1150
},
{
"epoch": 0.9983007646559049,
"grad_norm": 1.856980562210083,
"learning_rate": 9.537241379310345e-06,
"loss": 0.1226,
"step": 1175
},
{
"epoch": 1.0195412064570943,
"grad_norm": 2.5547516345977783,
"learning_rate": 9.52e-06,
"loss": 0.0963,
"step": 1200
},
{
"epoch": 1.0407816482582837,
"grad_norm": 2.0279276371002197,
"learning_rate": 9.502758620689655e-06,
"loss": 0.0973,
"step": 1225
},
{
"epoch": 1.0620220900594732,
"grad_norm": 2.0436954498291016,
"learning_rate": 9.48551724137931e-06,
"loss": 0.0991,
"step": 1250
},
{
"epoch": 1.0832625318606628,
"grad_norm": 2.1336090564727783,
"learning_rate": 9.468275862068967e-06,
"loss": 0.1029,
"step": 1275
},
{
"epoch": 1.1045029736618521,
"grad_norm": 3.0516738891601562,
"learning_rate": 9.451034482758622e-06,
"loss": 0.1043,
"step": 1300
},
{
"epoch": 1.1257434154630417,
"grad_norm": 2.6009092330932617,
"learning_rate": 9.433793103448276e-06,
"loss": 0.0995,
"step": 1325
},
{
"epoch": 1.146983857264231,
"grad_norm": 2.47499680519104,
"learning_rate": 9.416551724137933e-06,
"loss": 0.1036,
"step": 1350
},
{
"epoch": 1.1682242990654206,
"grad_norm": 1.6448906660079956,
"learning_rate": 9.399310344827586e-06,
"loss": 0.1011,
"step": 1375
},
{
"epoch": 1.18946474086661,
"grad_norm": 1.9409310817718506,
"learning_rate": 9.382068965517243e-06,
"loss": 0.1087,
"step": 1400
},
{
"epoch": 1.2107051826677995,
"grad_norm": 2.1227598190307617,
"learning_rate": 9.364827586206898e-06,
"loss": 0.1038,
"step": 1425
},
{
"epoch": 1.231945624468989,
"grad_norm": 2.4984381198883057,
"learning_rate": 9.347586206896552e-06,
"loss": 0.093,
"step": 1450
},
{
"epoch": 1.2531860662701784,
"grad_norm": 1.5227315425872803,
"learning_rate": 9.330344827586207e-06,
"loss": 0.1014,
"step": 1475
},
{
"epoch": 1.274426508071368,
"grad_norm": 2.6108906269073486,
"learning_rate": 9.313103448275864e-06,
"loss": 0.095,
"step": 1500
},
{
"epoch": 1.2956669498725573,
"grad_norm": 3.612428903579712,
"learning_rate": 9.295862068965517e-06,
"loss": 0.0966,
"step": 1525
},
{
"epoch": 1.3169073916737468,
"grad_norm": 2.017115831375122,
"learning_rate": 9.278620689655174e-06,
"loss": 0.1033,
"step": 1550
},
{
"epoch": 1.3381478334749364,
"grad_norm": 2.264723539352417,
"learning_rate": 9.261379310344828e-06,
"loss": 0.1043,
"step": 1575
},
{
"epoch": 1.3593882752761257,
"grad_norm": 2.3359768390655518,
"learning_rate": 9.244137931034483e-06,
"loss": 0.106,
"step": 1600
},
{
"epoch": 1.3806287170773153,
"grad_norm": 3.682831287384033,
"learning_rate": 9.226896551724138e-06,
"loss": 0.1072,
"step": 1625
},
{
"epoch": 1.4018691588785046,
"grad_norm": 1.7336595058441162,
"learning_rate": 9.209655172413793e-06,
"loss": 0.0998,
"step": 1650
},
{
"epoch": 1.4231096006796942,
"grad_norm": 3.3865325450897217,
"learning_rate": 9.192413793103448e-06,
"loss": 0.0977,
"step": 1675
},
{
"epoch": 1.4443500424808837,
"grad_norm": 1.7132407426834106,
"learning_rate": 9.175172413793105e-06,
"loss": 0.0966,
"step": 1700
},
{
"epoch": 1.465590484282073,
"grad_norm": 2.182499647140503,
"learning_rate": 9.15793103448276e-06,
"loss": 0.0993,
"step": 1725
},
{
"epoch": 1.4868309260832624,
"grad_norm": 2.137913942337036,
"learning_rate": 9.140689655172414e-06,
"loss": 0.0959,
"step": 1750
},
{
"epoch": 1.508071367884452,
"grad_norm": 1.5623725652694702,
"learning_rate": 9.12344827586207e-06,
"loss": 0.0975,
"step": 1775
},
{
"epoch": 1.5293118096856415,
"grad_norm": 1.7139003276824951,
"learning_rate": 9.106206896551724e-06,
"loss": 0.0903,
"step": 1800
},
{
"epoch": 1.550552251486831,
"grad_norm": 1.6749699115753174,
"learning_rate": 9.08896551724138e-06,
"loss": 0.1032,
"step": 1825
},
{
"epoch": 1.5717926932880204,
"grad_norm": 1.4775348901748657,
"learning_rate": 9.071724137931035e-06,
"loss": 0.0938,
"step": 1850
},
{
"epoch": 1.5930331350892097,
"grad_norm": 1.4703927040100098,
"learning_rate": 9.05448275862069e-06,
"loss": 0.0893,
"step": 1875
},
{
"epoch": 1.6142735768903993,
"grad_norm": 1.614033579826355,
"learning_rate": 9.037241379310345e-06,
"loss": 0.0986,
"step": 1900
},
{
"epoch": 1.6355140186915889,
"grad_norm": 1.822026014328003,
"learning_rate": 9.020000000000002e-06,
"loss": 0.0993,
"step": 1925
},
{
"epoch": 1.6567544604927784,
"grad_norm": 3.0836634635925293,
"learning_rate": 9.002758620689655e-06,
"loss": 0.1044,
"step": 1950
},
{
"epoch": 1.6779949022939677,
"grad_norm": 1.8662205934524536,
"learning_rate": 8.985517241379311e-06,
"loss": 0.105,
"step": 1975
},
{
"epoch": 1.699235344095157,
"grad_norm": 1.576250433921814,
"learning_rate": 8.968275862068966e-06,
"loss": 0.0945,
"step": 2000
},
{
"epoch": 1.699235344095157,
"eval_loss": 0.11690503358840942,
"eval_runtime": 5994.3147,
"eval_samples_per_second": 0.787,
"eval_steps_per_second": 0.098,
"eval_wer": 36.85704364630041,
"step": 2000
},
{
"epoch": 1.7204757858963466,
"grad_norm": 2.065964937210083,
"learning_rate": 8.951034482758621e-06,
"loss": 0.0974,
"step": 2025
},
{
"epoch": 1.7417162276975362,
"grad_norm": 1.7408511638641357,
"learning_rate": 8.933793103448276e-06,
"loss": 0.1026,
"step": 2050
},
{
"epoch": 1.7629566694987255,
"grad_norm": 1.8468472957611084,
"learning_rate": 8.916551724137931e-06,
"loss": 0.0868,
"step": 2075
},
{
"epoch": 1.7841971112999149,
"grad_norm": 1.8215452432632446,
"learning_rate": 8.899310344827588e-06,
"loss": 0.0957,
"step": 2100
},
{
"epoch": 1.8054375531011044,
"grad_norm": 1.5454055070877075,
"learning_rate": 8.882068965517242e-06,
"loss": 0.0948,
"step": 2125
},
{
"epoch": 1.826677994902294,
"grad_norm": 1.947662115097046,
"learning_rate": 8.864827586206897e-06,
"loss": 0.0971,
"step": 2150
},
{
"epoch": 1.8479184367034835,
"grad_norm": 1.5574594736099243,
"learning_rate": 8.847586206896552e-06,
"loss": 0.0917,
"step": 2175
},
{
"epoch": 1.8691588785046729,
"grad_norm": 1.2428698539733887,
"learning_rate": 8.830344827586209e-06,
"loss": 0.1034,
"step": 2200
},
{
"epoch": 1.8903993203058622,
"grad_norm": 1.9029223918914795,
"learning_rate": 8.813103448275862e-06,
"loss": 0.0978,
"step": 2225
},
{
"epoch": 1.9116397621070518,
"grad_norm": 2.2894680500030518,
"learning_rate": 8.795862068965518e-06,
"loss": 0.0975,
"step": 2250
},
{
"epoch": 1.9328802039082413,
"grad_norm": 1.284964680671692,
"learning_rate": 8.778620689655173e-06,
"loss": 0.096,
"step": 2275
},
{
"epoch": 1.954120645709431,
"grad_norm": 1.7411472797393799,
"learning_rate": 8.761379310344828e-06,
"loss": 0.0961,
"step": 2300
},
{
"epoch": 1.9753610875106202,
"grad_norm": 2.421389102935791,
"learning_rate": 8.744137931034483e-06,
"loss": 0.0954,
"step": 2325
},
{
"epoch": 1.9966015293118096,
"grad_norm": 2.0307414531707764,
"learning_rate": 8.72689655172414e-06,
"loss": 0.1041,
"step": 2350
},
{
"epoch": 2.017841971112999,
"grad_norm": 2.228976249694824,
"learning_rate": 8.709655172413793e-06,
"loss": 0.0677,
"step": 2375
},
{
"epoch": 2.0390824129141887,
"grad_norm": 2.581620693206787,
"learning_rate": 8.69241379310345e-06,
"loss": 0.07,
"step": 2400
},
{
"epoch": 2.0603228547153782,
"grad_norm": 1.4978506565093994,
"learning_rate": 8.675172413793104e-06,
"loss": 0.0694,
"step": 2425
},
{
"epoch": 2.0815632965165674,
"grad_norm": 1.972231388092041,
"learning_rate": 8.657931034482759e-06,
"loss": 0.0667,
"step": 2450
},
{
"epoch": 2.102803738317757,
"grad_norm": 2.09955096244812,
"learning_rate": 8.640689655172414e-06,
"loss": 0.0663,
"step": 2475
},
{
"epoch": 2.1240441801189465,
"grad_norm": 2.7959463596343994,
"learning_rate": 8.623448275862069e-06,
"loss": 0.0685,
"step": 2500
},
{
"epoch": 2.145284621920136,
"grad_norm": 1.7082490921020508,
"learning_rate": 8.606206896551725e-06,
"loss": 0.0631,
"step": 2525
},
{
"epoch": 2.1665250637213256,
"grad_norm": 1.101549506187439,
"learning_rate": 8.58896551724138e-06,
"loss": 0.0745,
"step": 2550
},
{
"epoch": 2.1877655055225147,
"grad_norm": 1.121897578239441,
"learning_rate": 8.571724137931035e-06,
"loss": 0.0637,
"step": 2575
},
{
"epoch": 2.2090059473237043,
"grad_norm": 1.6201415061950684,
"learning_rate": 8.55448275862069e-06,
"loss": 0.0668,
"step": 2600
},
{
"epoch": 2.230246389124894,
"grad_norm": 1.5426428318023682,
"learning_rate": 8.537241379310347e-06,
"loss": 0.0729,
"step": 2625
},
{
"epoch": 2.2514868309260834,
"grad_norm": 1.564487099647522,
"learning_rate": 8.52e-06,
"loss": 0.0687,
"step": 2650
},
{
"epoch": 2.2727272727272725,
"grad_norm": 1.3694915771484375,
"learning_rate": 8.502758620689656e-06,
"loss": 0.0638,
"step": 2675
},
{
"epoch": 2.293967714528462,
"grad_norm": 1.5504151582717896,
"learning_rate": 8.485517241379311e-06,
"loss": 0.0655,
"step": 2700
},
{
"epoch": 2.3152081563296516,
"grad_norm": 1.066007375717163,
"learning_rate": 8.468275862068966e-06,
"loss": 0.0727,
"step": 2725
},
{
"epoch": 2.336448598130841,
"grad_norm": 1.760932207107544,
"learning_rate": 8.451034482758621e-06,
"loss": 0.0665,
"step": 2750
},
{
"epoch": 2.3576890399320307,
"grad_norm": 1.405328392982483,
"learning_rate": 8.433793103448277e-06,
"loss": 0.0672,
"step": 2775
},
{
"epoch": 2.37892948173322,
"grad_norm": 2.686798572540283,
"learning_rate": 8.41655172413793e-06,
"loss": 0.0686,
"step": 2800
},
{
"epoch": 2.4001699235344094,
"grad_norm": 1.3735514879226685,
"learning_rate": 8.399310344827587e-06,
"loss": 0.0659,
"step": 2825
},
{
"epoch": 2.421410365335599,
"grad_norm": 1.1501052379608154,
"learning_rate": 8.382068965517242e-06,
"loss": 0.0681,
"step": 2850
},
{
"epoch": 2.4426508071367885,
"grad_norm": 1.6193690299987793,
"learning_rate": 8.364827586206897e-06,
"loss": 0.0698,
"step": 2875
},
{
"epoch": 2.463891248937978,
"grad_norm": 1.5359407663345337,
"learning_rate": 8.347586206896552e-06,
"loss": 0.0662,
"step": 2900
},
{
"epoch": 2.485131690739167,
"grad_norm": 1.283834457397461,
"learning_rate": 8.330344827586208e-06,
"loss": 0.0659,
"step": 2925
},
{
"epoch": 2.5063721325403567,
"grad_norm": 1.861663818359375,
"learning_rate": 8.313103448275863e-06,
"loss": 0.0692,
"step": 2950
},
{
"epoch": 2.5276125743415463,
"grad_norm": 2.0781443119049072,
"learning_rate": 8.295862068965518e-06,
"loss": 0.0647,
"step": 2975
},
{
"epoch": 2.548853016142736,
"grad_norm": 1.4371556043624878,
"learning_rate": 8.278620689655173e-06,
"loss": 0.0673,
"step": 3000
},
{
"epoch": 2.548853016142736,
"eval_loss": 0.11657554656267166,
"eval_runtime": 5993.138,
"eval_samples_per_second": 0.787,
"eval_steps_per_second": 0.098,
"eval_wer": 35.086130108549604,
"step": 3000
},
{
"epoch": 2.5700934579439254,
"grad_norm": 1.3172967433929443,
"learning_rate": 8.261379310344828e-06,
"loss": 0.0713,
"step": 3025
},
{
"epoch": 2.5913338997451145,
"grad_norm": 1.6619157791137695,
"learning_rate": 8.244137931034484e-06,
"loss": 0.0642,
"step": 3050
},
{
"epoch": 2.612574341546304,
"grad_norm": 2.0745792388916016,
"learning_rate": 8.226896551724138e-06,
"loss": 0.0666,
"step": 3075
},
{
"epoch": 2.6338147833474936,
"grad_norm": 1.6569820642471313,
"learning_rate": 8.209655172413794e-06,
"loss": 0.0623,
"step": 3100
},
{
"epoch": 2.655055225148683,
"grad_norm": 2.5140037536621094,
"learning_rate": 8.192413793103449e-06,
"loss": 0.0726,
"step": 3125
},
{
"epoch": 2.6762956669498728,
"grad_norm": 1.3549573421478271,
"learning_rate": 8.175172413793104e-06,
"loss": 0.0678,
"step": 3150
},
{
"epoch": 2.697536108751062,
"grad_norm": 2.3230879306793213,
"learning_rate": 8.157931034482759e-06,
"loss": 0.0654,
"step": 3175
},
{
"epoch": 2.7187765505522514,
"grad_norm": 1.4579910039901733,
"learning_rate": 8.140689655172415e-06,
"loss": 0.0648,
"step": 3200
},
{
"epoch": 2.740016992353441,
"grad_norm": 2.045642852783203,
"learning_rate": 8.123448275862069e-06,
"loss": 0.0671,
"step": 3225
},
{
"epoch": 2.7612574341546305,
"grad_norm": 1.847688913345337,
"learning_rate": 8.106206896551725e-06,
"loss": 0.0665,
"step": 3250
},
{
"epoch": 2.78249787595582,
"grad_norm": 2.109140396118164,
"learning_rate": 8.08896551724138e-06,
"loss": 0.0765,
"step": 3275
},
{
"epoch": 2.803738317757009,
"grad_norm": 1.8210623264312744,
"learning_rate": 8.071724137931035e-06,
"loss": 0.0684,
"step": 3300
},
{
"epoch": 2.8249787595581988,
"grad_norm": 1.6090059280395508,
"learning_rate": 8.054482758620691e-06,
"loss": 0.0667,
"step": 3325
},
{
"epoch": 2.8462192013593883,
"grad_norm": 1.2714420557022095,
"learning_rate": 8.037241379310346e-06,
"loss": 0.0668,
"step": 3350
},
{
"epoch": 2.867459643160578,
"grad_norm": 2.028714656829834,
"learning_rate": 8.020000000000001e-06,
"loss": 0.0637,
"step": 3375
},
{
"epoch": 2.8887000849617674,
"grad_norm": 1.2474462985992432,
"learning_rate": 8.002758620689656e-06,
"loss": 0.0654,
"step": 3400
},
{
"epoch": 2.9099405267629566,
"grad_norm": 1.373960018157959,
"learning_rate": 7.985517241379311e-06,
"loss": 0.066,
"step": 3425
},
{
"epoch": 2.931180968564146,
"grad_norm": 1.931864857673645,
"learning_rate": 7.968275862068966e-06,
"loss": 0.0685,
"step": 3450
},
{
"epoch": 2.9524214103653357,
"grad_norm": 2.4559192657470703,
"learning_rate": 7.951034482758622e-06,
"loss": 0.072,
"step": 3475
},
{
"epoch": 2.973661852166525,
"grad_norm": 2.4385876655578613,
"learning_rate": 7.933793103448275e-06,
"loss": 0.0695,
"step": 3500
},
{
"epoch": 2.994902293967715,
"grad_norm": 1.8550530672073364,
"learning_rate": 7.916551724137932e-06,
"loss": 0.0714,
"step": 3525
},
{
"epoch": 3.016142735768904,
"grad_norm": 1.8583050966262817,
"learning_rate": 7.899310344827587e-06,
"loss": 0.051,
"step": 3550
},
{
"epoch": 3.0373831775700935,
"grad_norm": 1.2027584314346313,
"learning_rate": 7.882068965517242e-06,
"loss": 0.0412,
"step": 3575
},
{
"epoch": 3.058623619371283,
"grad_norm": 1.6738337278366089,
"learning_rate": 7.864827586206897e-06,
"loss": 0.0476,
"step": 3600
},
{
"epoch": 3.0798640611724726,
"grad_norm": 1.2990750074386597,
"learning_rate": 7.847586206896553e-06,
"loss": 0.0445,
"step": 3625
},
{
"epoch": 3.1011045029736617,
"grad_norm": 1.6865178346633911,
"learning_rate": 7.830344827586206e-06,
"loss": 0.0435,
"step": 3650
},
{
"epoch": 3.1223449447748512,
"grad_norm": 1.803493618965149,
"learning_rate": 7.813103448275863e-06,
"loss": 0.0437,
"step": 3675
},
{
"epoch": 3.143585386576041,
"grad_norm": 1.9559437036514282,
"learning_rate": 7.795862068965518e-06,
"loss": 0.0433,
"step": 3700
},
{
"epoch": 3.1648258283772304,
"grad_norm": 1.1054346561431885,
"learning_rate": 7.778620689655173e-06,
"loss": 0.0446,
"step": 3725
},
{
"epoch": 3.1860662701784195,
"grad_norm": 1.3556439876556396,
"learning_rate": 7.76137931034483e-06,
"loss": 0.0461,
"step": 3750
},
{
"epoch": 3.207306711979609,
"grad_norm": 1.6769301891326904,
"learning_rate": 7.744137931034484e-06,
"loss": 0.0443,
"step": 3775
},
{
"epoch": 3.2285471537807986,
"grad_norm": 1.7046959400177002,
"learning_rate": 7.726896551724139e-06,
"loss": 0.0451,
"step": 3800
},
{
"epoch": 3.249787595581988,
"grad_norm": 2.6806883811950684,
"learning_rate": 7.709655172413794e-06,
"loss": 0.0441,
"step": 3825
},
{
"epoch": 3.2710280373831777,
"grad_norm": 1.0329188108444214,
"learning_rate": 7.692413793103449e-06,
"loss": 0.0458,
"step": 3850
},
{
"epoch": 3.292268479184367,
"grad_norm": 1.2138526439666748,
"learning_rate": 7.675172413793104e-06,
"loss": 0.0448,
"step": 3875
},
{
"epoch": 3.3135089209855564,
"grad_norm": 0.8664061427116394,
"learning_rate": 7.65793103448276e-06,
"loss": 0.0466,
"step": 3900
},
{
"epoch": 3.334749362786746,
"grad_norm": 0.9848700761795044,
"learning_rate": 7.640689655172413e-06,
"loss": 0.0502,
"step": 3925
},
{
"epoch": 3.3559898045879355,
"grad_norm": 1.179836392402649,
"learning_rate": 7.62344827586207e-06,
"loss": 0.0462,
"step": 3950
},
{
"epoch": 3.377230246389125,
"grad_norm": 2.3013267517089844,
"learning_rate": 7.606206896551725e-06,
"loss": 0.0444,
"step": 3975
},
{
"epoch": 3.398470688190314,
"grad_norm": 1.637501835823059,
"learning_rate": 7.58896551724138e-06,
"loss": 0.0475,
"step": 4000
},
{
"epoch": 3.398470688190314,
"eval_loss": 0.12786675989627838,
"eval_runtime": 5959.6356,
"eval_samples_per_second": 0.791,
"eval_steps_per_second": 0.099,
"eval_wer": 35.16010688889896,
"step": 4000
},
{
"epoch": 3.4197111299915037,
"grad_norm": 1.6689115762710571,
"learning_rate": 7.5717241379310345e-06,
"loss": 0.0424,
"step": 4025
},
{
"epoch": 3.4409515717926933,
"grad_norm": 1.9614856243133545,
"learning_rate": 7.55448275862069e-06,
"loss": 0.0485,
"step": 4050
},
{
"epoch": 3.462192013593883,
"grad_norm": 1.556401014328003,
"learning_rate": 7.537241379310345e-06,
"loss": 0.0459,
"step": 4075
},
{
"epoch": 3.4834324553950724,
"grad_norm": 1.4535021781921387,
"learning_rate": 7.520000000000001e-06,
"loss": 0.0447,
"step": 4100
},
{
"epoch": 3.5046728971962615,
"grad_norm": 0.9936099648475647,
"learning_rate": 7.5027586206896566e-06,
"loss": 0.0465,
"step": 4125
},
{
"epoch": 3.525913338997451,
"grad_norm": 1.7872281074523926,
"learning_rate": 7.485517241379311e-06,
"loss": 0.0504,
"step": 4150
},
{
"epoch": 3.5471537807986406,
"grad_norm": 1.3985124826431274,
"learning_rate": 7.468275862068966e-06,
"loss": 0.0452,
"step": 4175
},
{
"epoch": 3.56839422259983,
"grad_norm": 1.2840551137924194,
"learning_rate": 7.451034482758621e-06,
"loss": 0.0451,
"step": 4200
},
{
"epoch": 3.5896346644010197,
"grad_norm": 1.9468519687652588,
"learning_rate": 7.433793103448277e-06,
"loss": 0.0485,
"step": 4225
},
{
"epoch": 3.610875106202209,
"grad_norm": 1.1277813911437988,
"learning_rate": 7.416551724137932e-06,
"loss": 0.0447,
"step": 4250
},
{
"epoch": 3.6321155480033984,
"grad_norm": 1.3325130939483643,
"learning_rate": 7.3993103448275875e-06,
"loss": 0.0459,
"step": 4275
},
{
"epoch": 3.653355989804588,
"grad_norm": 1.05362069606781,
"learning_rate": 7.3820689655172415e-06,
"loss": 0.0475,
"step": 4300
},
{
"epoch": 3.6745964316057775,
"grad_norm": 1.4702696800231934,
"learning_rate": 7.364827586206897e-06,
"loss": 0.0496,
"step": 4325
},
{
"epoch": 3.695836873406967,
"grad_norm": 1.3535746335983276,
"learning_rate": 7.347586206896552e-06,
"loss": 0.0454,
"step": 4350
},
{
"epoch": 3.717077315208156,
"grad_norm": 2.088671922683716,
"learning_rate": 7.330344827586208e-06,
"loss": 0.0513,
"step": 4375
},
{
"epoch": 3.7383177570093458,
"grad_norm": 1.3528611660003662,
"learning_rate": 7.313103448275863e-06,
"loss": 0.0458,
"step": 4400
},
{
"epoch": 3.7595581988105353,
"grad_norm": 2.404648542404175,
"learning_rate": 7.2958620689655175e-06,
"loss": 0.0448,
"step": 4425
},
{
"epoch": 3.780798640611725,
"grad_norm": 1.7826001644134521,
"learning_rate": 7.278620689655172e-06,
"loss": 0.0439,
"step": 4450
},
{
"epoch": 3.8020390824129144,
"grad_norm": 1.5115604400634766,
"learning_rate": 7.261379310344828e-06,
"loss": 0.0456,
"step": 4475
},
{
"epoch": 3.8232795242141036,
"grad_norm": 1.6182334423065186,
"learning_rate": 7.244137931034483e-06,
"loss": 0.0441,
"step": 4500
},
{
"epoch": 3.844519966015293,
"grad_norm": 1.242518663406372,
"learning_rate": 7.226896551724139e-06,
"loss": 0.0465,
"step": 4525
},
{
"epoch": 3.8657604078164827,
"grad_norm": 1.5941393375396729,
"learning_rate": 7.2096551724137944e-06,
"loss": 0.045,
"step": 4550
},
{
"epoch": 3.887000849617672,
"grad_norm": 1.697571873664856,
"learning_rate": 7.1924137931034485e-06,
"loss": 0.0487,
"step": 4575
},
{
"epoch": 3.908241291418862,
"grad_norm": 1.3660414218902588,
"learning_rate": 7.175172413793104e-06,
"loss": 0.046,
"step": 4600
},
{
"epoch": 3.929481733220051,
"grad_norm": 1.4809635877609253,
"learning_rate": 7.157931034482759e-06,
"loss": 0.0459,
"step": 4625
},
{
"epoch": 3.9507221750212405,
"grad_norm": 1.4990068674087524,
"learning_rate": 7.140689655172415e-06,
"loss": 0.0489,
"step": 4650
},
{
"epoch": 3.97196261682243,
"grad_norm": 1.7496356964111328,
"learning_rate": 7.12344827586207e-06,
"loss": 0.0448,
"step": 4675
},
{
"epoch": 3.993203058623619,
"grad_norm": 1.5272170305252075,
"learning_rate": 7.106206896551725e-06,
"loss": 0.0486,
"step": 4700
},
{
"epoch": 4.014443500424809,
"grad_norm": 1.0526211261749268,
"learning_rate": 7.088965517241379e-06,
"loss": 0.0352,
"step": 4725
},
{
"epoch": 4.035683942225998,
"grad_norm": 1.301439881324768,
"learning_rate": 7.071724137931035e-06,
"loss": 0.0289,
"step": 4750
},
{
"epoch": 4.056924384027187,
"grad_norm": 1.026371717453003,
"learning_rate": 7.05448275862069e-06,
"loss": 0.0273,
"step": 4775
},
{
"epoch": 4.078164825828377,
"grad_norm": 1.4210519790649414,
"learning_rate": 7.037241379310346e-06,
"loss": 0.0271,
"step": 4800
},
{
"epoch": 4.0994052676295665,
"grad_norm": 1.5456641912460327,
"learning_rate": 7.0200000000000006e-06,
"loss": 0.0282,
"step": 4825
},
{
"epoch": 4.1206457094307565,
"grad_norm": 1.5171129703521729,
"learning_rate": 7.002758620689655e-06,
"loss": 0.0294,
"step": 4850
},
{
"epoch": 4.141886151231946,
"grad_norm": 1.4718422889709473,
"learning_rate": 6.98551724137931e-06,
"loss": 0.0268,
"step": 4875
},
{
"epoch": 4.163126593033135,
"grad_norm": 1.2558562755584717,
"learning_rate": 6.968275862068966e-06,
"loss": 0.0291,
"step": 4900
},
{
"epoch": 4.184367034834325,
"grad_norm": 0.772474467754364,
"learning_rate": 6.951034482758622e-06,
"loss": 0.0279,
"step": 4925
},
{
"epoch": 4.205607476635514,
"grad_norm": 1.2242004871368408,
"learning_rate": 6.933793103448277e-06,
"loss": 0.0296,
"step": 4950
},
{
"epoch": 4.226847918436704,
"grad_norm": 1.9706392288208008,
"learning_rate": 6.916551724137932e-06,
"loss": 0.0293,
"step": 4975
},
{
"epoch": 4.248088360237893,
"grad_norm": 1.1294701099395752,
"learning_rate": 6.899310344827586e-06,
"loss": 0.0288,
"step": 5000
},
{
"epoch": 4.248088360237893,
"eval_loss": 0.14424686133861542,
"eval_runtime": 5968.4953,
"eval_samples_per_second": 0.79,
"eval_steps_per_second": 0.099,
"eval_wer": 35.7292751785256,
"step": 5000
}
],
"logging_steps": 25,
"max_steps": 15000,
"num_input_tokens_seen": 0,
"num_train_epochs": 13,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.161165082492928e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}