End of training

Browse files

Files changed (6) hide show

README.md +1 -1
all_results.json +15 -0
eval_results.json +9 -0
runs/Oct04_07-16-17_a99ffc333f86/events.out.tfevents.1728085226.a99ffc333f86.35.1 +3 -0
train_results.json +9 -0
trainer_state.json +1487 -0

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
 # whisper-large-v3-ft-btb-cv-cy
-This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.3838
 - Wer: 0.2732

 # whisper-large-v3-ft-btb-cv-cy
+This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the DewiBrynJones/banc-trawsgrifiadau-bangor-clean train main, DewiBrynJones/commonvoice_18_0_cy train+dev+test main dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.3838
 - Wer: 0.2732

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 2.8555111364934325,
+    "eval_loss": 0.38383349776268005,
+    "eval_runtime": 1673.49,
+    "eval_samples": 3901,
+    "eval_samples_per_second": 2.331,
+    "eval_steps_per_second": 0.146,
+    "eval_wer": 0.27318168646769053,
+    "total_flos": 5.435589590699213e+20,
+    "train_loss": 0.3002769865989685,
+    "train_runtime": 59305.2217,
+    "train_samples": 56026,
+    "train_samples_per_second": 2.698,
+    "train_steps_per_second": 0.084
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.8555111364934325,
+    "eval_loss": 0.38383349776268005,
+    "eval_runtime": 1673.49,
+    "eval_samples": 3901,
+    "eval_samples_per_second": 2.331,
+    "eval_steps_per_second": 0.146,
+    "eval_wer": 0.27318168646769053
+}

runs/Oct04_07-16-17_a99ffc333f86/events.out.tfevents.1728085226.a99ffc333f86.35.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:904261755ca0d94d881ed13df66b1550c5a8d171c4b4e9b0aa1bc71f4e005027
+size 406

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.8555111364934325,
+    "total_flos": 5.435589590699213e+20,
+    "train_loss": 0.3002769865989685,
+    "train_runtime": 59305.2217,
+    "train_samples": 56026,
+    "train_samples_per_second": 2.698,
+    "train_steps_per_second": 0.084
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1487 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.8555111364934325,
+  "eval_steps": 1000,
+  "global_step": 5000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.014277555682467162,
+      "grad_norm": 7.571424961090088,
+      "learning_rate": 5.000000000000001e-07,
+      "loss": 1.5088,
+      "step": 25
+    },
+    {
+      "epoch": 0.028555111364934323,
+      "grad_norm": 5.992729187011719,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 1.2038,
+      "step": 50
+    },
+    {
+      "epoch": 0.04283266704740148,
+      "grad_norm": 5.949503421783447,
+      "learning_rate": 1.5e-06,
+      "loss": 0.8879,
+      "step": 75
+    },
+    {
+      "epoch": 0.05711022272986865,
+      "grad_norm": 4.452832221984863,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.7647,
+      "step": 100
+    },
+    {
+      "epoch": 0.0713877784123358,
+      "grad_norm": 4.690545558929443,
+      "learning_rate": 2.5e-06,
+      "loss": 0.6792,
+      "step": 125
+    },
+    {
+      "epoch": 0.08566533409480297,
+      "grad_norm": 4.969720840454102,
+      "learning_rate": 3e-06,
+      "loss": 0.6549,
+      "step": 150
+    },
+    {
+      "epoch": 0.09994288977727013,
+      "grad_norm": 5.184281349182129,
+      "learning_rate": 3.5e-06,
+      "loss": 0.6376,
+      "step": 175
+    },
+    {
+      "epoch": 0.1142204454597373,
+      "grad_norm": 5.00349235534668,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.5982,
+      "step": 200
+    },
+    {
+      "epoch": 0.12849800114220444,
+      "grad_norm": 4.239490032196045,
+      "learning_rate": 4.5e-06,
+      "loss": 0.6084,
+      "step": 225
+    },
+    {
+      "epoch": 0.1427755568246716,
+      "grad_norm": 4.2740068435668945,
+      "learning_rate": 5e-06,
+      "loss": 0.58,
+      "step": 250
+    },
+    {
+      "epoch": 0.15705311250713877,
+      "grad_norm": 4.718848705291748,
+      "learning_rate": 5.500000000000001e-06,
+      "loss": 0.5759,
+      "step": 275
+    },
+    {
+      "epoch": 0.17133066818960593,
+      "grad_norm": 4.2935638427734375,
+      "learning_rate": 6e-06,
+      "loss": 0.5625,
+      "step": 300
+    },
+    {
+      "epoch": 0.1856082238720731,
+      "grad_norm": 4.917020797729492,
+      "learning_rate": 6.5000000000000004e-06,
+      "loss": 0.5621,
+      "step": 325
+    },
+    {
+      "epoch": 0.19988577955454026,
+      "grad_norm": 3.9521942138671875,
+      "learning_rate": 7e-06,
+      "loss": 0.5644,
+      "step": 350
+    },
+    {
+      "epoch": 0.21416333523700742,
+      "grad_norm": 4.506232738494873,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.5508,
+      "step": 375
+    },
+    {
+      "epoch": 0.2284408909194746,
+      "grad_norm": 4.1483540534973145,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.5244,
+      "step": 400
+    },
+    {
+      "epoch": 0.24271844660194175,
+      "grad_norm": 4.077396392822266,
+      "learning_rate": 8.5e-06,
+      "loss": 0.5051,
+      "step": 425
+    },
+    {
+      "epoch": 0.2569960022844089,
+      "grad_norm": 4.375626087188721,
+      "learning_rate": 9e-06,
+      "loss": 0.5222,
+      "step": 450
+    },
+    {
+      "epoch": 0.2712735579668761,
+      "grad_norm": 3.5698530673980713,
+      "learning_rate": 9.5e-06,
+      "loss": 0.5038,
+      "step": 475
+    },
+    {
+      "epoch": 0.2855511136493432,
+      "grad_norm": 4.99509859085083,
+      "learning_rate": 1e-05,
+      "loss": 0.5196,
+      "step": 500
+    },
+    {
+      "epoch": 0.2998286693318104,
+      "grad_norm": 3.666332721710205,
+      "learning_rate": 9.944444444444445e-06,
+      "loss": 0.5066,
+      "step": 525
+    },
+    {
+      "epoch": 0.31410622501427754,
+      "grad_norm": 3.9203736782073975,
+      "learning_rate": 9.88888888888889e-06,
+      "loss": 0.4822,
+      "step": 550
+    },
+    {
+      "epoch": 0.32838378069674473,
+      "grad_norm": 3.5677530765533447,
+      "learning_rate": 9.833333333333333e-06,
+      "loss": 0.519,
+      "step": 575
+    },
+    {
+      "epoch": 0.34266133637921187,
+      "grad_norm": 3.3873414993286133,
+      "learning_rate": 9.777777777777779e-06,
+      "loss": 0.5205,
+      "step": 600
+    },
+    {
+      "epoch": 0.35693889206167906,
+      "grad_norm": 3.9527816772460938,
+      "learning_rate": 9.722222222222223e-06,
+      "loss": 0.4769,
+      "step": 625
+    },
+    {
+      "epoch": 0.3712164477441462,
+      "grad_norm": 3.3437490463256836,
+      "learning_rate": 9.666666666666667e-06,
+      "loss": 0.4629,
+      "step": 650
+    },
+    {
+      "epoch": 0.3854940034266134,
+      "grad_norm": 3.7754790782928467,
+      "learning_rate": 9.611111111111112e-06,
+      "loss": 0.4812,
+      "step": 675
+    },
+    {
+      "epoch": 0.3997715591090805,
+      "grad_norm": 3.744267225265503,
+      "learning_rate": 9.555555555555556e-06,
+      "loss": 0.467,
+      "step": 700
+    },
+    {
+      "epoch": 0.4140491147915477,
+      "grad_norm": 3.5076072216033936,
+      "learning_rate": 9.5e-06,
+      "loss": 0.4454,
+      "step": 725
+    },
+    {
+      "epoch": 0.42832667047401485,
+      "grad_norm": 3.556335687637329,
+      "learning_rate": 9.444444444444445e-06,
+      "loss": 0.4447,
+      "step": 750
+    },
+    {
+      "epoch": 0.442604226156482,
+      "grad_norm": 4.256951332092285,
+      "learning_rate": 9.38888888888889e-06,
+      "loss": 0.4809,
+      "step": 775
+    },
+    {
+      "epoch": 0.4568817818389492,
+      "grad_norm": 3.533447742462158,
+      "learning_rate": 9.333333333333334e-06,
+      "loss": 0.4425,
+      "step": 800
+    },
+    {
+      "epoch": 0.4711593375214163,
+      "grad_norm": 4.324098587036133,
+      "learning_rate": 9.277777777777778e-06,
+      "loss": 0.424,
+      "step": 825
+    },
+    {
+      "epoch": 0.4854368932038835,
+      "grad_norm": 2.913189649581909,
+      "learning_rate": 9.222222222222224e-06,
+      "loss": 0.4314,
+      "step": 850
+    },
+    {
+      "epoch": 0.49971444888635064,
+      "grad_norm": 3.432490825653076,
+      "learning_rate": 9.166666666666666e-06,
+      "loss": 0.4355,
+      "step": 875
+    },
+    {
+      "epoch": 0.5139920045688178,
+      "grad_norm": 3.645869255065918,
+      "learning_rate": 9.111111111111112e-06,
+      "loss": 0.4395,
+      "step": 900
+    },
+    {
+      "epoch": 0.528269560251285,
+      "grad_norm": 3.2094240188598633,
+      "learning_rate": 9.055555555555556e-06,
+      "loss": 0.4144,
+      "step": 925
+    },
+    {
+      "epoch": 0.5425471159337522,
+      "grad_norm": 3.4623546600341797,
+      "learning_rate": 9e-06,
+      "loss": 0.4277,
+      "step": 950
+    },
+    {
+      "epoch": 0.5568246716162193,
+      "grad_norm": 3.640333414077759,
+      "learning_rate": 8.944444444444446e-06,
+      "loss": 0.4246,
+      "step": 975
+    },
+    {
+      "epoch": 0.5711022272986864,
+      "grad_norm": 3.0283167362213135,
+      "learning_rate": 8.888888888888888e-06,
+      "loss": 0.4047,
+      "step": 1000
+    },
+    {
+      "epoch": 0.5711022272986864,
+      "eval_loss": 0.4848648011684418,
+      "eval_runtime": 1825.4203,
+      "eval_samples_per_second": 2.137,
+      "eval_steps_per_second": 0.134,
+      "eval_wer": 0.35052641746353713,
+      "step": 1000
+    },
+    {
+      "epoch": 0.5853797829811537,
+      "grad_norm": 3.7762739658355713,
+      "learning_rate": 8.833333333333334e-06,
+      "loss": 0.4218,
+      "step": 1025
+    },
+    {
+      "epoch": 0.5996573386636208,
+      "grad_norm": 3.495347023010254,
+      "learning_rate": 8.777777777777778e-06,
+      "loss": 0.3968,
+      "step": 1050
+    },
+    {
+      "epoch": 0.613934894346088,
+      "grad_norm": 3.5088939666748047,
+      "learning_rate": 8.722222222222224e-06,
+      "loss": 0.4108,
+      "step": 1075
+    },
+    {
+      "epoch": 0.6282124500285551,
+      "grad_norm": 3.555328845977783,
+      "learning_rate": 8.666666666666668e-06,
+      "loss": 0.4063,
+      "step": 1100
+    },
+    {
+      "epoch": 0.6424900057110223,
+      "grad_norm": 2.9576587677001953,
+      "learning_rate": 8.611111111111112e-06,
+      "loss": 0.4116,
+      "step": 1125
+    },
+    {
+      "epoch": 0.6567675613934895,
+      "grad_norm": 3.280855178833008,
+      "learning_rate": 8.555555555555556e-06,
+      "loss": 0.4083,
+      "step": 1150
+    },
+    {
+      "epoch": 0.6710451170759566,
+      "grad_norm": 3.903722047805786,
+      "learning_rate": 8.5e-06,
+      "loss": 0.411,
+      "step": 1175
+    },
+    {
+      "epoch": 0.6853226727584237,
+      "grad_norm": 3.519038438796997,
+      "learning_rate": 8.444444444444446e-06,
+      "loss": 0.3964,
+      "step": 1200
+    },
+    {
+      "epoch": 0.6996002284408909,
+      "grad_norm": 3.3553972244262695,
+      "learning_rate": 8.38888888888889e-06,
+      "loss": 0.4049,
+      "step": 1225
+    },
+    {
+      "epoch": 0.7138777841233581,
+      "grad_norm": 3.3820197582244873,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 0.4159,
+      "step": 1250
+    },
+    {
+      "epoch": 0.7281553398058253,
+      "grad_norm": 2.782127857208252,
+      "learning_rate": 8.277777777777778e-06,
+      "loss": 0.3859,
+      "step": 1275
+    },
+    {
+      "epoch": 0.7424328954882924,
+      "grad_norm": 3.5839345455169678,
+      "learning_rate": 8.222222222222222e-06,
+      "loss": 0.392,
+      "step": 1300
+    },
+    {
+      "epoch": 0.7567104511707595,
+      "grad_norm": 3.0308761596679688,
+      "learning_rate": 8.166666666666668e-06,
+      "loss": 0.3899,
+      "step": 1325
+    },
+    {
+      "epoch": 0.7709880068532268,
+      "grad_norm": 3.136904001235962,
+      "learning_rate": 8.111111111111112e-06,
+      "loss": 0.3907,
+      "step": 1350
+    },
+    {
+      "epoch": 0.7852655625356939,
+      "grad_norm": 3.3192756175994873,
+      "learning_rate": 8.055555555555557e-06,
+      "loss": 0.3941,
+      "step": 1375
+    },
+    {
+      "epoch": 0.799543118218161,
+      "grad_norm": 4.766107082366943,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.3887,
+      "step": 1400
+    },
+    {
+      "epoch": 0.8138206739006282,
+      "grad_norm": 4.241744041442871,
+      "learning_rate": 7.944444444444445e-06,
+      "loss": 0.4033,
+      "step": 1425
+    },
+    {
+      "epoch": 0.8280982295830954,
+      "grad_norm": 3.1559460163116455,
+      "learning_rate": 7.88888888888889e-06,
+      "loss": 0.3567,
+      "step": 1450
+    },
+    {
+      "epoch": 0.8423757852655626,
+      "grad_norm": 3.142645835876465,
+      "learning_rate": 7.833333333333333e-06,
+      "loss": 0.3731,
+      "step": 1475
+    },
+    {
+      "epoch": 0.8566533409480297,
+      "grad_norm": 3.1183199882507324,
+      "learning_rate": 7.77777777777778e-06,
+      "loss": 0.3668,
+      "step": 1500
+    },
+    {
+      "epoch": 0.8709308966304968,
+      "grad_norm": 2.7859325408935547,
+      "learning_rate": 7.722222222222223e-06,
+      "loss": 0.3965,
+      "step": 1525
+    },
+    {
+      "epoch": 0.885208452312964,
+      "grad_norm": 3.191088914871216,
+      "learning_rate": 7.666666666666667e-06,
+      "loss": 0.3574,
+      "step": 1550
+    },
+    {
+      "epoch": 0.8994860079954312,
+      "grad_norm": 3.0640053749084473,
+      "learning_rate": 7.611111111111111e-06,
+      "loss": 0.3811,
+      "step": 1575
+    },
+    {
+      "epoch": 0.9137635636778983,
+      "grad_norm": 3.0769450664520264,
+      "learning_rate": 7.555555555555556e-06,
+      "loss": 0.3788,
+      "step": 1600
+    },
+    {
+      "epoch": 0.9280411193603655,
+      "grad_norm": 3.1407933235168457,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.3698,
+      "step": 1625
+    },
+    {
+      "epoch": 0.9423186750428326,
+      "grad_norm": 3.410187244415283,
+      "learning_rate": 7.444444444444445e-06,
+      "loss": 0.3907,
+      "step": 1650
+    },
+    {
+      "epoch": 0.9565962307252999,
+      "grad_norm": 3.3382880687713623,
+      "learning_rate": 7.38888888888889e-06,
+      "loss": 0.3368,
+      "step": 1675
+    },
+    {
+      "epoch": 0.970873786407767,
+      "grad_norm": 3.194368600845337,
+      "learning_rate": 7.333333333333333e-06,
+      "loss": 0.369,
+      "step": 1700
+    },
+    {
+      "epoch": 0.9851513420902341,
+      "grad_norm": 3.089852809906006,
+      "learning_rate": 7.277777777777778e-06,
+      "loss": 0.3765,
+      "step": 1725
+    },
+    {
+      "epoch": 0.9994288977727013,
+      "grad_norm": 3.0002810955047607,
+      "learning_rate": 7.222222222222223e-06,
+      "loss": 0.3705,
+      "step": 1750
+    },
+    {
+      "epoch": 1.0137064534551685,
+      "grad_norm": 2.3977696895599365,
+      "learning_rate": 7.166666666666667e-06,
+      "loss": 0.2584,
+      "step": 1775
+    },
+    {
+      "epoch": 1.0279840091376355,
+      "grad_norm": 2.3220465183258057,
+      "learning_rate": 7.111111111111112e-06,
+      "loss": 0.2538,
+      "step": 1800
+    },
+    {
+      "epoch": 1.0422615648201028,
+      "grad_norm": 2.819687843322754,
+      "learning_rate": 7.055555555555557e-06,
+      "loss": 0.2571,
+      "step": 1825
+    },
+    {
+      "epoch": 1.05653912050257,
+      "grad_norm": 2.514644145965576,
+      "learning_rate": 7e-06,
+      "loss": 0.2806,
+      "step": 1850
+    },
+    {
+      "epoch": 1.070816676185037,
+      "grad_norm": 2.1887128353118896,
+      "learning_rate": 6.944444444444445e-06,
+      "loss": 0.2626,
+      "step": 1875
+    },
+    {
+      "epoch": 1.0850942318675043,
+      "grad_norm": 2.592247486114502,
+      "learning_rate": 6.88888888888889e-06,
+      "loss": 0.2509,
+      "step": 1900
+    },
+    {
+      "epoch": 1.0993717875499716,
+      "grad_norm": 2.371534824371338,
+      "learning_rate": 6.833333333333334e-06,
+      "loss": 0.2605,
+      "step": 1925
+    },
+    {
+      "epoch": 1.1136493432324386,
+      "grad_norm": 3.1825778484344482,
+      "learning_rate": 6.777777777777779e-06,
+      "loss": 0.2495,
+      "step": 1950
+    },
+    {
+      "epoch": 1.1279268989149058,
+      "grad_norm": 2.901749849319458,
+      "learning_rate": 6.7222222222222235e-06,
+      "loss": 0.261,
+      "step": 1975
+    },
+    {
+      "epoch": 1.1422044545973729,
+      "grad_norm": 2.658766984939575,
+      "learning_rate": 6.666666666666667e-06,
+      "loss": 0.2476,
+      "step": 2000
+    },
+    {
+      "epoch": 1.1422044545973729,
+      "eval_loss": 0.41870468854904175,
+      "eval_runtime": 1722.2575,
+      "eval_samples_per_second": 2.265,
+      "eval_steps_per_second": 0.142,
+      "eval_wer": 0.3136771950159374,
+      "step": 2000
+    },
+    {
+      "epoch": 1.15648201027984,
+      "grad_norm": 2.711312770843506,
+      "learning_rate": 6.6111111111111115e-06,
+      "loss": 0.2414,
+      "step": 2025
+    },
+    {
+      "epoch": 1.1707595659623073,
+      "grad_norm": 2.9044759273529053,
+      "learning_rate": 6.555555555555556e-06,
+      "loss": 0.2502,
+      "step": 2050
+    },
+    {
+      "epoch": 1.1850371216447744,
+      "grad_norm": 2.549725294113159,
+      "learning_rate": 6.5000000000000004e-06,
+      "loss": 0.2511,
+      "step": 2075
+    },
+    {
+      "epoch": 1.1993146773272416,
+      "grad_norm": 2.95792555809021,
+      "learning_rate": 6.444444444444445e-06,
+      "loss": 0.2427,
+      "step": 2100
+    },
+    {
+      "epoch": 1.2135922330097086,
+      "grad_norm": 2.686870574951172,
+      "learning_rate": 6.3888888888888885e-06,
+      "loss": 0.2637,
+      "step": 2125
+    },
+    {
+      "epoch": 1.227869788692176,
+      "grad_norm": 3.7834455966949463,
+      "learning_rate": 6.333333333333333e-06,
+      "loss": 0.2554,
+      "step": 2150
+    },
+    {
+      "epoch": 1.2421473443746431,
+      "grad_norm": 3.0891430377960205,
+      "learning_rate": 6.277777777777778e-06,
+      "loss": 0.2467,
+      "step": 2175
+    },
+    {
+      "epoch": 1.2564249000571102,
+      "grad_norm": 2.771472930908203,
+      "learning_rate": 6.222222222222223e-06,
+      "loss": 0.2467,
+      "step": 2200
+    },
+    {
+      "epoch": 1.2707024557395774,
+      "grad_norm": 2.6807925701141357,
+      "learning_rate": 6.166666666666667e-06,
+      "loss": 0.2682,
+      "step": 2225
+    },
+    {
+      "epoch": 1.2849800114220447,
+      "grad_norm": 2.2320196628570557,
+      "learning_rate": 6.111111111111112e-06,
+      "loss": 0.2408,
+      "step": 2250
+    },
+    {
+      "epoch": 1.2992575671045117,
+      "grad_norm": 3.066009759902954,
+      "learning_rate": 6.055555555555555e-06,
+      "loss": 0.2363,
+      "step": 2275
+    },
+    {
+      "epoch": 1.313535122786979,
+      "grad_norm": 2.6043167114257812,
+      "learning_rate": 6e-06,
+      "loss": 0.2483,
+      "step": 2300
+    },
+    {
+      "epoch": 1.327812678469446,
+      "grad_norm": 2.6250624656677246,
+      "learning_rate": 5.944444444444445e-06,
+      "loss": 0.2563,
+      "step": 2325
+    },
+    {
+      "epoch": 1.3420902341519132,
+      "grad_norm": 2.508998394012451,
+      "learning_rate": 5.88888888888889e-06,
+      "loss": 0.2581,
+      "step": 2350
+    },
+    {
+      "epoch": 1.3563677898343802,
+      "grad_norm": 2.872715473175049,
+      "learning_rate": 5.833333333333334e-06,
+      "loss": 0.2371,
+      "step": 2375
+    },
+    {
+      "epoch": 1.3706453455168475,
+      "grad_norm": 3.1910557746887207,
+      "learning_rate": 5.777777777777778e-06,
+      "loss": 0.2515,
+      "step": 2400
+    },
+    {
+      "epoch": 1.3849229011993147,
+      "grad_norm": 2.7466485500335693,
+      "learning_rate": 5.722222222222222e-06,
+      "loss": 0.2578,
+      "step": 2425
+    },
+    {
+      "epoch": 1.3992004568817817,
+      "grad_norm": 2.388066530227661,
+      "learning_rate": 5.666666666666667e-06,
+      "loss": 0.2541,
+      "step": 2450
+    },
+    {
+      "epoch": 1.413478012564249,
+      "grad_norm": 2.688497304916382,
+      "learning_rate": 5.611111111111112e-06,
+      "loss": 0.2514,
+      "step": 2475
+    },
+    {
+      "epoch": 1.4277555682467162,
+      "grad_norm": 2.710899591445923,
+      "learning_rate": 5.555555555555557e-06,
+      "loss": 0.2765,
+      "step": 2500
+    },
+    {
+      "epoch": 1.4420331239291833,
+      "grad_norm": 2.296635389328003,
+      "learning_rate": 5.500000000000001e-06,
+      "loss": 0.2487,
+      "step": 2525
+    },
+    {
+      "epoch": 1.4563106796116505,
+      "grad_norm": 2.7988133430480957,
+      "learning_rate": 5.444444444444445e-06,
+      "loss": 0.2499,
+      "step": 2550
+    },
+    {
+      "epoch": 1.4705882352941178,
+      "grad_norm": 3.1988582611083984,
+      "learning_rate": 5.388888888888889e-06,
+      "loss": 0.2456,
+      "step": 2575
+    },
+    {
+      "epoch": 1.4848657909765848,
+      "grad_norm": 2.657517910003662,
+      "learning_rate": 5.333333333333334e-06,
+      "loss": 0.2613,
+      "step": 2600
+    },
+    {
+      "epoch": 1.499143346659052,
+      "grad_norm": 2.5517725944519043,
+      "learning_rate": 5.2777777777777785e-06,
+      "loss": 0.2528,
+      "step": 2625
+    },
+    {
+      "epoch": 1.5134209023415193,
+      "grad_norm": 2.7166850566864014,
+      "learning_rate": 5.2222222222222226e-06,
+      "loss": 0.2476,
+      "step": 2650
+    },
+    {
+      "epoch": 1.5276984580239863,
+      "grad_norm": 2.7338292598724365,
+      "learning_rate": 5.1666666666666675e-06,
+      "loss": 0.2489,
+      "step": 2675
+    },
+    {
+      "epoch": 1.5419760137064533,
+      "grad_norm": 2.1498470306396484,
+      "learning_rate": 5.1111111111111115e-06,
+      "loss": 0.2388,
+      "step": 2700
+    },
+    {
+      "epoch": 1.5562535693889206,
+      "grad_norm": 2.595247745513916,
+      "learning_rate": 5.0555555555555555e-06,
+      "loss": 0.2566,
+      "step": 2725
+    },
+    {
+      "epoch": 1.5705311250713878,
+      "grad_norm": 2.652132987976074,
+      "learning_rate": 5e-06,
+      "loss": 0.239,
+      "step": 2750
+    },
+    {
+      "epoch": 1.5848086807538548,
+      "grad_norm": 2.436605930328369,
+      "learning_rate": 4.944444444444445e-06,
+      "loss": 0.2419,
+      "step": 2775
+    },
+    {
+      "epoch": 1.599086236436322,
+      "grad_norm": 2.618035316467285,
+      "learning_rate": 4.888888888888889e-06,
+      "loss": 0.2295,
+      "step": 2800
+    },
+    {
+      "epoch": 1.6133637921187893,
+      "grad_norm": 2.2901298999786377,
+      "learning_rate": 4.833333333333333e-06,
+      "loss": 0.2446,
+      "step": 2825
+    },
+    {
+      "epoch": 1.6276413478012564,
+      "grad_norm": 2.899315595626831,
+      "learning_rate": 4.777777777777778e-06,
+      "loss": 0.2628,
+      "step": 2850
+    },
+    {
+      "epoch": 1.6419189034837236,
+      "grad_norm": 2.616224527359009,
+      "learning_rate": 4.722222222222222e-06,
+      "loss": 0.2273,
+      "step": 2875
+    },
+    {
+      "epoch": 1.6561964591661908,
+      "grad_norm": 2.43113112449646,
+      "learning_rate": 4.666666666666667e-06,
+      "loss": 0.2362,
+      "step": 2900
+    },
+    {
+      "epoch": 1.6704740148486579,
+      "grad_norm": 2.5203065872192383,
+      "learning_rate": 4.611111111111112e-06,
+      "loss": 0.2428,
+      "step": 2925
+    },
+    {
+      "epoch": 1.6847515705311251,
+      "grad_norm": 2.3064985275268555,
+      "learning_rate": 4.555555555555556e-06,
+      "loss": 0.2441,
+      "step": 2950
+    },
+    {
+      "epoch": 1.6990291262135924,
+      "grad_norm": 2.201695680618286,
+      "learning_rate": 4.5e-06,
+      "loss": 0.2324,
+      "step": 2975
+    },
+    {
+      "epoch": 1.7133066818960594,
+      "grad_norm": 2.442471981048584,
+      "learning_rate": 4.444444444444444e-06,
+      "loss": 0.2527,
+      "step": 3000
+    },
+    {
+      "epoch": 1.7133066818960594,
+      "eval_loss": 0.3882293701171875,
+      "eval_runtime": 1749.1422,
+      "eval_samples_per_second": 2.23,
+      "eval_steps_per_second": 0.139,
+      "eval_wer": 0.2901091471071187,
+      "step": 3000
+    },
+    {
+      "epoch": 1.7275842375785264,
+      "grad_norm": 2.77786922454834,
+      "learning_rate": 4.388888888888889e-06,
+      "loss": 0.2492,
+      "step": 3025
+    },
+    {
+      "epoch": 1.7418617932609937,
+      "grad_norm": 2.5009052753448486,
+      "learning_rate": 4.333333333333334e-06,
+      "loss": 0.2341,
+      "step": 3050
+    },
+    {
+      "epoch": 1.756139348943461,
+      "grad_norm": 2.780186176300049,
+      "learning_rate": 4.277777777777778e-06,
+      "loss": 0.2407,
+      "step": 3075
+    },
+    {
+      "epoch": 1.770416904625928,
+      "grad_norm": 1.9574618339538574,
+      "learning_rate": 4.222222222222223e-06,
+      "loss": 0.2437,
+      "step": 3100
+    },
+    {
+      "epoch": 1.7846944603083952,
+      "grad_norm": 2.151125907897949,
+      "learning_rate": 4.166666666666667e-06,
+      "loss": 0.2341,
+      "step": 3125
+    },
+    {
+      "epoch": 1.7989720159908624,
+      "grad_norm": 2.170015811920166,
+      "learning_rate": 4.111111111111111e-06,
+      "loss": 0.2373,
+      "step": 3150
+    },
+    {
+      "epoch": 1.8132495716733295,
+      "grad_norm": 3.0467231273651123,
+      "learning_rate": 4.055555555555556e-06,
+      "loss": 0.2317,
+      "step": 3175
+    },
+    {
+      "epoch": 1.8275271273557967,
+      "grad_norm": 3.0150015354156494,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.228,
+      "step": 3200
+    },
+    {
+      "epoch": 1.841804683038264,
+      "grad_norm": 3.275949001312256,
+      "learning_rate": 3.944444444444445e-06,
+      "loss": 0.2438,
+      "step": 3225
+    },
+    {
+      "epoch": 1.856082238720731,
+      "grad_norm": 3.0381839275360107,
+      "learning_rate": 3.88888888888889e-06,
+      "loss": 0.2478,
+      "step": 3250
+    },
+    {
+      "epoch": 1.8703597944031982,
+      "grad_norm": 2.770716428756714,
+      "learning_rate": 3.833333333333334e-06,
+      "loss": 0.2312,
+      "step": 3275
+    },
+    {
+      "epoch": 1.8846373500856655,
+      "grad_norm": 2.6976678371429443,
+      "learning_rate": 3.777777777777778e-06,
+      "loss": 0.2284,
+      "step": 3300
+    },
+    {
+      "epoch": 1.8989149057681325,
+      "grad_norm": 2.8799102306365967,
+      "learning_rate": 3.7222222222222225e-06,
+      "loss": 0.2484,
+      "step": 3325
+    },
+    {
+      "epoch": 1.9131924614505995,
+      "grad_norm": 2.574629545211792,
+      "learning_rate": 3.6666666666666666e-06,
+      "loss": 0.2295,
+      "step": 3350
+    },
+    {
+      "epoch": 1.927470017133067,
+      "grad_norm": 2.4746835231781006,
+      "learning_rate": 3.6111111111111115e-06,
+      "loss": 0.2335,
+      "step": 3375
+    },
+    {
+      "epoch": 1.941747572815534,
+      "grad_norm": 3.084383964538574,
+      "learning_rate": 3.555555555555556e-06,
+      "loss": 0.212,
+      "step": 3400
+    },
+    {
+      "epoch": 1.956025128498001,
+      "grad_norm": 2.4441068172454834,
+      "learning_rate": 3.5e-06,
+      "loss": 0.221,
+      "step": 3425
+    },
+    {
+      "epoch": 1.9703026841804683,
+      "grad_norm": 3.031568765640259,
+      "learning_rate": 3.444444444444445e-06,
+      "loss": 0.2341,
+      "step": 3450
+    },
+    {
+      "epoch": 1.9845802398629355,
+      "grad_norm": 2.3584327697753906,
+      "learning_rate": 3.3888888888888893e-06,
+      "loss": 0.2431,
+      "step": 3475
+    },
+    {
+      "epoch": 1.9988577955454025,
+      "grad_norm": 2.1590421199798584,
+      "learning_rate": 3.3333333333333333e-06,
+      "loss": 0.2357,
+      "step": 3500
+    },
+    {
+      "epoch": 2.0131353512278696,
+      "grad_norm": 2.2845587730407715,
+      "learning_rate": 3.277777777777778e-06,
+      "loss": 0.1576,
+      "step": 3525
+    },
+    {
+      "epoch": 2.027412906910337,
+      "grad_norm": 2.033133029937744,
+      "learning_rate": 3.2222222222222227e-06,
+      "loss": 0.1422,
+      "step": 3550
+    },
+    {
+      "epoch": 2.041690462592804,
+      "grad_norm": 2.2549259662628174,
+      "learning_rate": 3.1666666666666667e-06,
+      "loss": 0.1473,
+      "step": 3575
+    },
+    {
+      "epoch": 2.055968018275271,
+      "grad_norm": 1.5837754011154175,
+      "learning_rate": 3.1111111111111116e-06,
+      "loss": 0.143,
+      "step": 3600
+    },
+    {
+      "epoch": 2.0702455739577386,
+      "grad_norm": 1.9988360404968262,
+      "learning_rate": 3.055555555555556e-06,
+      "loss": 0.1416,
+      "step": 3625
+    },
+    {
+      "epoch": 2.0845231296402056,
+      "grad_norm": 2.148613929748535,
+      "learning_rate": 3e-06,
+      "loss": 0.1338,
+      "step": 3650
+    },
+    {
+      "epoch": 2.0988006853226726,
+      "grad_norm": 1.8176393508911133,
+      "learning_rate": 2.944444444444445e-06,
+      "loss": 0.1514,
+      "step": 3675
+    },
+    {
+      "epoch": 2.11307824100514,
+      "grad_norm": 2.60271954536438,
+      "learning_rate": 2.888888888888889e-06,
+      "loss": 0.1533,
+      "step": 3700
+    },
+    {
+      "epoch": 2.127355796687607,
+      "grad_norm": 2.120281457901001,
+      "learning_rate": 2.8333333333333335e-06,
+      "loss": 0.1404,
+      "step": 3725
+    },
+    {
+      "epoch": 2.141633352370074,
+      "grad_norm": 2.3522286415100098,
+      "learning_rate": 2.7777777777777783e-06,
+      "loss": 0.1511,
+      "step": 3750
+    },
+    {
+      "epoch": 2.1559109080525416,
+      "grad_norm": 1.8738924264907837,
+      "learning_rate": 2.7222222222222224e-06,
+      "loss": 0.1417,
+      "step": 3775
+    },
+    {
+      "epoch": 2.1701884637350086,
+      "grad_norm": 2.255291223526001,
+      "learning_rate": 2.666666666666667e-06,
+      "loss": 0.1437,
+      "step": 3800
+    },
+    {
+      "epoch": 2.1844660194174756,
+      "grad_norm": 1.7046154737472534,
+      "learning_rate": 2.6111111111111113e-06,
+      "loss": 0.1446,
+      "step": 3825
+    },
+    {
+      "epoch": 2.198743575099943,
+      "grad_norm": 2.0543861389160156,
+      "learning_rate": 2.5555555555555557e-06,
+      "loss": 0.1504,
+      "step": 3850
+    },
+    {
+      "epoch": 2.21302113078241,
+      "grad_norm": 2.139716863632202,
+      "learning_rate": 2.5e-06,
+      "loss": 0.1345,
+      "step": 3875
+    },
+    {
+      "epoch": 2.227298686464877,
+      "grad_norm": 1.7999951839447021,
+      "learning_rate": 2.4444444444444447e-06,
+      "loss": 0.1389,
+      "step": 3900
+    },
+    {
+      "epoch": 2.241576242147344,
+      "grad_norm": 1.7282090187072754,
+      "learning_rate": 2.388888888888889e-06,
+      "loss": 0.1324,
+      "step": 3925
+    },
+    {
+      "epoch": 2.2558537978298117,
+      "grad_norm": 2.6271605491638184,
+      "learning_rate": 2.3333333333333336e-06,
+      "loss": 0.1551,
+      "step": 3950
+    },
+    {
+      "epoch": 2.2701313535122787,
+      "grad_norm": 2.170382022857666,
+      "learning_rate": 2.277777777777778e-06,
+      "loss": 0.144,
+      "step": 3975
+    },
+    {
+      "epoch": 2.2844089091947457,
+      "grad_norm": 1.796635627746582,
+      "learning_rate": 2.222222222222222e-06,
+      "loss": 0.1568,
+      "step": 4000
+    },
+    {
+      "epoch": 2.2844089091947457,
+      "eval_loss": 0.3901652991771698,
+      "eval_runtime": 1765.3609,
+      "eval_samples_per_second": 2.21,
+      "eval_steps_per_second": 0.138,
+      "eval_wer": 0.28160919540229884,
+      "step": 4000
+    },
+    {
+      "epoch": 2.298686464877213,
+      "grad_norm": 2.0357980728149414,
+      "learning_rate": 2.166666666666667e-06,
+      "loss": 0.161,
+      "step": 4025
+    },
+    {
+      "epoch": 2.31296402055968,
+      "grad_norm": 2.027215003967285,
+      "learning_rate": 2.1111111111111114e-06,
+      "loss": 0.1353,
+      "step": 4050
+    },
+    {
+      "epoch": 2.3272415762421472,
+      "grad_norm": 2.8169405460357666,
+      "learning_rate": 2.0555555555555555e-06,
+      "loss": 0.1449,
+      "step": 4075
+    },
+    {
+      "epoch": 2.3415191319246147,
+      "grad_norm": 1.9528751373291016,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.1376,
+      "step": 4100
+    },
+    {
+      "epoch": 2.3557966876070817,
+      "grad_norm": 2.5781335830688477,
+      "learning_rate": 1.944444444444445e-06,
+      "loss": 0.1383,
+      "step": 4125
+    },
+    {
+      "epoch": 2.3700742432895487,
+      "grad_norm": 2.083077907562256,
+      "learning_rate": 1.888888888888889e-06,
+      "loss": 0.1362,
+      "step": 4150
+    },
+    {
+      "epoch": 2.384351798972016,
+      "grad_norm": 2.431272029876709,
+      "learning_rate": 1.8333333333333333e-06,
+      "loss": 0.1329,
+      "step": 4175
+    },
+    {
+      "epoch": 2.3986293546544832,
+      "grad_norm": 2.157139539718628,
+      "learning_rate": 1.777777777777778e-06,
+      "loss": 0.1377,
+      "step": 4200
+    },
+    {
+      "epoch": 2.4129069103369503,
+      "grad_norm": 2.5328071117401123,
+      "learning_rate": 1.7222222222222224e-06,
+      "loss": 0.1361,
+      "step": 4225
+    },
+    {
+      "epoch": 2.4271844660194173,
+      "grad_norm": 2.433239459991455,
+      "learning_rate": 1.6666666666666667e-06,
+      "loss": 0.157,
+      "step": 4250
+    },
+    {
+      "epoch": 2.4414620217018848,
+      "grad_norm": 2.5167510509490967,
+      "learning_rate": 1.6111111111111113e-06,
+      "loss": 0.132,
+      "step": 4275
+    },
+    {
+      "epoch": 2.455739577384352,
+      "grad_norm": 1.9507442712783813,
+      "learning_rate": 1.5555555555555558e-06,
+      "loss": 0.1625,
+      "step": 4300
+    },
+    {
+      "epoch": 2.470017133066819,
+      "grad_norm": 2.2467007637023926,
+      "learning_rate": 1.5e-06,
+      "loss": 0.1333,
+      "step": 4325
+    },
+    {
+      "epoch": 2.4842946887492863,
+      "grad_norm": 2.4816768169403076,
+      "learning_rate": 1.4444444444444445e-06,
+      "loss": 0.1499,
+      "step": 4350
+    },
+    {
+      "epoch": 2.4985722444317533,
+      "grad_norm": 2.0616416931152344,
+      "learning_rate": 1.3888888888888892e-06,
+      "loss": 0.1508,
+      "step": 4375
+    },
+    {
+      "epoch": 2.5128498001142203,
+      "grad_norm": 2.089355230331421,
+      "learning_rate": 1.3333333333333334e-06,
+      "loss": 0.1344,
+      "step": 4400
+    },
+    {
+      "epoch": 2.5271273557966873,
+      "grad_norm": 2.2235498428344727,
+      "learning_rate": 1.28e-06,
+      "loss": 0.1717,
+      "step": 4425
+    },
+    {
+      "epoch": 2.541404911479155,
+      "grad_norm": 1.9268138408660889,
+      "learning_rate": 1.2244444444444445e-06,
+      "loss": 0.143,
+      "step": 4450
+    },
+    {
+      "epoch": 2.555682467161622,
+      "grad_norm": 1.8911551237106323,
+      "learning_rate": 1.168888888888889e-06,
+      "loss": 0.1439,
+      "step": 4475
+    },
+    {
+      "epoch": 2.5699600228440893,
+      "grad_norm": 2.5078868865966797,
+      "learning_rate": 1.1133333333333334e-06,
+      "loss": 0.1341,
+      "step": 4500
+    },
+    {
+      "epoch": 2.5842375785265563,
+      "grad_norm": 2.1232492923736572,
+      "learning_rate": 1.0577777777777779e-06,
+      "loss": 0.1415,
+      "step": 4525
+    },
+    {
+      "epoch": 2.5985151342090234,
+      "grad_norm": 1.9214311838150024,
+      "learning_rate": 1.0022222222222223e-06,
+      "loss": 0.1301,
+      "step": 4550
+    },
+    {
+      "epoch": 2.6127926898914904,
+      "grad_norm": 2.4226858615875244,
+      "learning_rate": 9.466666666666667e-07,
+      "loss": 0.1438,
+      "step": 4575
+    },
+    {
+      "epoch": 2.627070245573958,
+      "grad_norm": 2.324777126312256,
+      "learning_rate": 8.911111111111112e-07,
+      "loss": 0.1306,
+      "step": 4600
+    },
+    {
+      "epoch": 2.641347801256425,
+      "grad_norm": 2.427114486694336,
+      "learning_rate": 8.355555555555556e-07,
+      "loss": 0.1359,
+      "step": 4625
+    },
+    {
+      "epoch": 2.655625356938892,
+      "grad_norm": 1.989882469177246,
+      "learning_rate": 7.8e-07,
+      "loss": 0.1386,
+      "step": 4650
+    },
+    {
+      "epoch": 2.6699029126213594,
+      "grad_norm": 2.6079118251800537,
+      "learning_rate": 7.244444444444446e-07,
+      "loss": 0.135,
+      "step": 4675
+    },
+    {
+      "epoch": 2.6841804683038264,
+      "grad_norm": 2.3429243564605713,
+      "learning_rate": 6.68888888888889e-07,
+      "loss": 0.1356,
+      "step": 4700
+    },
+    {
+      "epoch": 2.6984580239862934,
+      "grad_norm": 2.3358540534973145,
+      "learning_rate": 6.133333333333333e-07,
+      "loss": 0.1304,
+      "step": 4725
+    },
+    {
+      "epoch": 2.7127355796687604,
+      "grad_norm": 1.917809247970581,
+      "learning_rate": 5.577777777777779e-07,
+      "loss": 0.1395,
+      "step": 4750
+    },
+    {
+      "epoch": 2.727013135351228,
+      "grad_norm": 2.0677952766418457,
+      "learning_rate": 5.022222222222222e-07,
+      "loss": 0.1309,
+      "step": 4775
+    },
+    {
+      "epoch": 2.741290691033695,
+      "grad_norm": 2.135127305984497,
+      "learning_rate": 4.466666666666667e-07,
+      "loss": 0.1424,
+      "step": 4800
+    },
+    {
+      "epoch": 2.7555682467161624,
+      "grad_norm": 2.3306682109832764,
+      "learning_rate": 3.9111111111111115e-07,
+      "loss": 0.1318,
+      "step": 4825
+    },
+    {
+      "epoch": 2.7698458023986294,
+      "grad_norm": 2.0700454711914062,
+      "learning_rate": 3.3555555555555556e-07,
+      "loss": 0.1566,
+      "step": 4850
+    },
+    {
+      "epoch": 2.7841233580810965,
+      "grad_norm": 1.8561683893203735,
+      "learning_rate": 2.8e-07,
+      "loss": 0.1453,
+      "step": 4875
+    },
+    {
+      "epoch": 2.7984009137635635,
+      "grad_norm": 2.2682347297668457,
+      "learning_rate": 2.2444444444444445e-07,
+      "loss": 0.1415,
+      "step": 4900
+    },
+    {
+      "epoch": 2.812678469446031,
+      "grad_norm": 2.2898778915405273,
+      "learning_rate": 1.6888888888888888e-07,
+      "loss": 0.1427,
+      "step": 4925
+    },
+    {
+      "epoch": 2.826956025128498,
+      "grad_norm": 2.328401803970337,
+      "learning_rate": 1.1333333333333336e-07,
+      "loss": 0.1357,
+      "step": 4950
+    },
+    {
+      "epoch": 2.841233580810965,
+      "grad_norm": 2.2169013023376465,
+      "learning_rate": 5.777777777777778e-08,
+      "loss": 0.1343,
+      "step": 4975
+    },
+    {
+      "epoch": 2.8555111364934325,
+      "grad_norm": 2.42340350151062,
+      "learning_rate": 2.2222222222222225e-09,
+      "loss": 0.1313,
+      "step": 5000
+    },
+    {
+      "epoch": 2.8555111364934325,
+      "eval_loss": 0.38383349776268005,
+      "eval_runtime": 1820.062,
+      "eval_samples_per_second": 2.143,
+      "eval_steps_per_second": 0.134,
+      "eval_wer": 0.27318168646769053,
+      "step": 5000
+    },
+    {
+      "epoch": 2.8555111364934325,
+      "step": 5000,
+      "total_flos": 5.435589590699213e+20,
+      "train_loss": 0.3002769865989685,
+      "train_runtime": 59305.2217,
+      "train_samples_per_second": 2.698,
+      "train_steps_per_second": 0.084
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.435589590699213e+20,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}