End of training

Browse files

Files changed (6) hide show

README.md +15 -2
all_results.json +15 -0
eval_results.json +9 -0
runs/Oct05_19-39-47_d2d35f196850/events.out.tfevents.1728193404.d2d35f196850.118.1 +3 -0
train_results.json +9 -0
trainer_state.json +1487 -0

README.md CHANGED Viewed

@@ -3,11 +3,24 @@ license: apache-2.0
 base_model: openai/whisper-large-v2
 tags:
 - generated_from_trainer
 metrics:
 - wer
 model-index:
 - name: whisper-large-v2-ec
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,7 +28,7 @@ should probably proofread and complete it, then remove this comment. -->
 # whisper-large-v2-ec
-This model is a fine-tuned version of [openai/whisper-large-v2](https://huggingface.co/openai/whisper-large-v2) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.5119
 - Wer: 0.2167

 base_model: openai/whisper-large-v2
 tags:
 - generated_from_trainer
+datasets:
+- wanasash/enwaucymraeg
 metrics:
 - wer
 model-index:
 - name: whisper-large-v2-ec
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: wanasash/enwaucymraeg default
+      type: wanasash/enwaucymraeg
+      args: default
+    metrics:
+    - name: Wer
+      type: wer
+      value: 0.21671018276762402
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # whisper-large-v2-ec
+This model is a fine-tuned version of [openai/whisper-large-v2](https://huggingface.co/openai/whisper-large-v2) on the wanasash/enwaucymraeg default dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.5119
 - Wer: 0.2167

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 68.02721088435374,
+    "eval_loss": 0.5118595957756042,
+    "eval_runtime": 93.4089,
+    "eval_samples": 260,
+    "eval_samples_per_second": 2.783,
+    "eval_steps_per_second": 0.182,
+    "eval_wer": 0.21671018276762402,
+    "total_flos": 3.378304801456128e+20,
+    "train_loss": 0.03018118931162171,
+    "train_runtime": 39486.7724,
+    "train_samples": 2339,
+    "train_samples_per_second": 4.052,
+    "train_steps_per_second": 0.127
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 68.02721088435374,
+    "eval_loss": 0.5118595957756042,
+    "eval_runtime": 93.4089,
+    "eval_samples": 260,
+    "eval_samples_per_second": 2.783,
+    "eval_steps_per_second": 0.182,
+    "eval_wer": 0.21671018276762402
+}

runs/Oct05_19-39-47_d2d35f196850/events.out.tfevents.1728193404.d2d35f196850.118.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8a31ee42a2327a22e6ddf54eb99531cfde3a91b922c70d1567607fca6df0ee2
+size 406

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 68.02721088435374,
+    "total_flos": 3.378304801456128e+20,
+    "train_loss": 0.03018118931162171,
+    "train_runtime": 39486.7724,
+    "train_samples": 2339,
+    "train_samples_per_second": 4.052,
+    "train_steps_per_second": 0.127
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1487 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 68.02721088435374,
+  "eval_steps": 1000,
+  "global_step": 5000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.3401360544217687,
+      "grad_norm": 6.3686203956604,
+      "learning_rate": 5.000000000000001e-07,
+      "loss": 1.2352,
+      "step": 25
+    },
+    {
+      "epoch": 0.6802721088435374,
+      "grad_norm": 4.156219959259033,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 0.7799,
+      "step": 50
+    },
+    {
+      "epoch": 1.0204081632653061,
+      "grad_norm": 4.332057952880859,
+      "learning_rate": 1.5e-06,
+      "loss": 0.5115,
+      "step": 75
+    },
+    {
+      "epoch": 1.3605442176870748,
+      "grad_norm": 3.7332186698913574,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.4105,
+      "step": 100
+    },
+    {
+      "epoch": 1.7006802721088436,
+      "grad_norm": 3.6035523414611816,
+      "learning_rate": 2.5e-06,
+      "loss": 0.374,
+      "step": 125
+    },
+    {
+      "epoch": 2.0408163265306123,
+      "grad_norm": 2.3493571281433105,
+      "learning_rate": 3e-06,
+      "loss": 0.3344,
+      "step": 150
+    },
+    {
+      "epoch": 2.380952380952381,
+      "grad_norm": 3.0683202743530273,
+      "learning_rate": 3.48e-06,
+      "loss": 0.248,
+      "step": 175
+    },
+    {
+      "epoch": 2.7210884353741496,
+      "grad_norm": 3.189012289047241,
+      "learning_rate": 3.980000000000001e-06,
+      "loss": 0.2395,
+      "step": 200
+    },
+    {
+      "epoch": 3.061224489795918,
+      "grad_norm": 2.2101962566375732,
+      "learning_rate": 4.48e-06,
+      "loss": 0.2258,
+      "step": 225
+    },
+    {
+      "epoch": 3.4013605442176873,
+      "grad_norm": 2.2124788761138916,
+      "learning_rate": 4.980000000000001e-06,
+      "loss": 0.1494,
+      "step": 250
+    },
+    {
+      "epoch": 3.741496598639456,
+      "grad_norm": 3.446359395980835,
+      "learning_rate": 5.480000000000001e-06,
+      "loss": 0.1512,
+      "step": 275
+    },
+    {
+      "epoch": 4.081632653061225,
+      "grad_norm": 2.592689037322998,
+      "learning_rate": 5.98e-06,
+      "loss": 0.1359,
+      "step": 300
+    },
+    {
+      "epoch": 4.421768707482993,
+      "grad_norm": 3.1709280014038086,
+      "learning_rate": 6.480000000000001e-06,
+      "loss": 0.0859,
+      "step": 325
+    },
+    {
+      "epoch": 4.761904761904762,
+      "grad_norm": 2.485269546508789,
+      "learning_rate": 6.98e-06,
+      "loss": 0.0958,
+      "step": 350
+    },
+    {
+      "epoch": 5.1020408163265305,
+      "grad_norm": 1.868928074836731,
+      "learning_rate": 7.48e-06,
+      "loss": 0.0846,
+      "step": 375
+    },
+    {
+      "epoch": 5.442176870748299,
+      "grad_norm": 3.283317804336548,
+      "learning_rate": 7.980000000000002e-06,
+      "loss": 0.0575,
+      "step": 400
+    },
+    {
+      "epoch": 5.782312925170068,
+      "grad_norm": 2.18278431892395,
+      "learning_rate": 8.48e-06,
+      "loss": 0.075,
+      "step": 425
+    },
+    {
+      "epoch": 6.122448979591836,
+      "grad_norm": 1.7689893245697021,
+      "learning_rate": 8.98e-06,
+      "loss": 0.0637,
+      "step": 450
+    },
+    {
+      "epoch": 6.462585034013605,
+      "grad_norm": 2.60971736907959,
+      "learning_rate": 9.48e-06,
+      "loss": 0.0517,
+      "step": 475
+    },
+    {
+      "epoch": 6.802721088435375,
+      "grad_norm": 1.903644323348999,
+      "learning_rate": 9.980000000000001e-06,
+      "loss": 0.0578,
+      "step": 500
+    },
+    {
+      "epoch": 7.142857142857143,
+      "grad_norm": 1.4152497053146362,
+      "learning_rate": 9.946666666666667e-06,
+      "loss": 0.0565,
+      "step": 525
+    },
+    {
+      "epoch": 7.482993197278912,
+      "grad_norm": 2.119438409805298,
+      "learning_rate": 9.891111111111113e-06,
+      "loss": 0.0436,
+      "step": 550
+    },
+    {
+      "epoch": 7.8231292517006805,
+      "grad_norm": 1.8895118236541748,
+      "learning_rate": 9.835555555555556e-06,
+      "loss": 0.042,
+      "step": 575
+    },
+    {
+      "epoch": 8.16326530612245,
+      "grad_norm": 1.7676234245300293,
+      "learning_rate": 9.780000000000001e-06,
+      "loss": 0.0351,
+      "step": 600
+    },
+    {
+      "epoch": 8.503401360544217,
+      "grad_norm": 1.8845597505569458,
+      "learning_rate": 9.724444444444445e-06,
+      "loss": 0.0279,
+      "step": 625
+    },
+    {
+      "epoch": 8.843537414965986,
+      "grad_norm": 9.495149612426758,
+      "learning_rate": 9.66888888888889e-06,
+      "loss": 0.031,
+      "step": 650
+    },
+    {
+      "epoch": 9.183673469387756,
+      "grad_norm": 1.6925195455551147,
+      "learning_rate": 9.613333333333335e-06,
+      "loss": 0.0303,
+      "step": 675
+    },
+    {
+      "epoch": 9.523809523809524,
+      "grad_norm": 1.4979898929595947,
+      "learning_rate": 9.557777777777777e-06,
+      "loss": 0.023,
+      "step": 700
+    },
+    {
+      "epoch": 9.863945578231293,
+      "grad_norm": 2.2269773483276367,
+      "learning_rate": 9.502222222222223e-06,
+      "loss": 0.0261,
+      "step": 725
+    },
+    {
+      "epoch": 10.204081632653061,
+      "grad_norm": 1.0259639024734497,
+      "learning_rate": 9.446666666666667e-06,
+      "loss": 0.0226,
+      "step": 750
+    },
+    {
+      "epoch": 10.54421768707483,
+      "grad_norm": 1.9924999475479126,
+      "learning_rate": 9.391111111111111e-06,
+      "loss": 0.0205,
+      "step": 775
+    },
+    {
+      "epoch": 10.884353741496598,
+      "grad_norm": 1.616970419883728,
+      "learning_rate": 9.335555555555557e-06,
+      "loss": 0.0199,
+      "step": 800
+    },
+    {
+      "epoch": 11.224489795918368,
+      "grad_norm": 0.922492504119873,
+      "learning_rate": 9.280000000000001e-06,
+      "loss": 0.0161,
+      "step": 825
+    },
+    {
+      "epoch": 11.564625850340136,
+      "grad_norm": 2.508662223815918,
+      "learning_rate": 9.224444444444445e-06,
+      "loss": 0.0145,
+      "step": 850
+    },
+    {
+      "epoch": 11.904761904761905,
+      "grad_norm": 1.371565341949463,
+      "learning_rate": 9.168888888888889e-06,
+      "loss": 0.0179,
+      "step": 875
+    },
+    {
+      "epoch": 12.244897959183673,
+      "grad_norm": 1.303175687789917,
+      "learning_rate": 9.113333333333335e-06,
+      "loss": 0.0155,
+      "step": 900
+    },
+    {
+      "epoch": 12.585034013605442,
+      "grad_norm": 1.1102138757705688,
+      "learning_rate": 9.057777777777779e-06,
+      "loss": 0.012,
+      "step": 925
+    },
+    {
+      "epoch": 12.92517006802721,
+      "grad_norm": 0.8504889011383057,
+      "learning_rate": 9.002222222222223e-06,
+      "loss": 0.0121,
+      "step": 950
+    },
+    {
+      "epoch": 13.26530612244898,
+      "grad_norm": 0.8174204230308533,
+      "learning_rate": 8.946666666666669e-06,
+      "loss": 0.0106,
+      "step": 975
+    },
+    {
+      "epoch": 13.60544217687075,
+      "grad_norm": 1.821559190750122,
+      "learning_rate": 8.891111111111111e-06,
+      "loss": 0.0112,
+      "step": 1000
+    },
+    {
+      "epoch": 13.60544217687075,
+      "eval_loss": 0.39124733209609985,
+      "eval_runtime": 93.6528,
+      "eval_samples_per_second": 2.776,
+      "eval_steps_per_second": 0.182,
+      "eval_wer": 0.23946288698246923,
+      "step": 1000
+    },
+    {
+      "epoch": 13.945578231292517,
+      "grad_norm": 1.2810653448104858,
+      "learning_rate": 8.835555555555557e-06,
+      "loss": 0.0111,
+      "step": 1025
+    },
+    {
+      "epoch": 14.285714285714286,
+      "grad_norm": 1.2741467952728271,
+      "learning_rate": 8.78e-06,
+      "loss": 0.0097,
+      "step": 1050
+    },
+    {
+      "epoch": 14.625850340136054,
+      "grad_norm": 0.8524342179298401,
+      "learning_rate": 8.724444444444445e-06,
+      "loss": 0.0076,
+      "step": 1075
+    },
+    {
+      "epoch": 14.965986394557824,
+      "grad_norm": 1.643485426902771,
+      "learning_rate": 8.66888888888889e-06,
+      "loss": 0.0074,
+      "step": 1100
+    },
+    {
+      "epoch": 15.306122448979592,
+      "grad_norm": 0.40055137872695923,
+      "learning_rate": 8.613333333333333e-06,
+      "loss": 0.007,
+      "step": 1125
+    },
+    {
+      "epoch": 15.646258503401361,
+      "grad_norm": 1.1712241172790527,
+      "learning_rate": 8.557777777777778e-06,
+      "loss": 0.0072,
+      "step": 1150
+    },
+    {
+      "epoch": 15.986394557823129,
+      "grad_norm": 0.32212740182876587,
+      "learning_rate": 8.502222222222223e-06,
+      "loss": 0.007,
+      "step": 1175
+    },
+    {
+      "epoch": 16.3265306122449,
+      "grad_norm": 0.2166888266801834,
+      "learning_rate": 8.446666666666668e-06,
+      "loss": 0.0054,
+      "step": 1200
+    },
+    {
+      "epoch": 16.666666666666668,
+      "grad_norm": 0.12256942689418793,
+      "learning_rate": 8.391111111111112e-06,
+      "loss": 0.0039,
+      "step": 1225
+    },
+    {
+      "epoch": 17.006802721088434,
+      "grad_norm": 0.26391106843948364,
+      "learning_rate": 8.335555555555556e-06,
+      "loss": 0.0042,
+      "step": 1250
+    },
+    {
+      "epoch": 17.346938775510203,
+      "grad_norm": 0.24293136596679688,
+      "learning_rate": 8.28e-06,
+      "loss": 0.0036,
+      "step": 1275
+    },
+    {
+      "epoch": 17.687074829931973,
+      "grad_norm": 0.27556732296943665,
+      "learning_rate": 8.224444444444444e-06,
+      "loss": 0.0028,
+      "step": 1300
+    },
+    {
+      "epoch": 18.027210884353742,
+      "grad_norm": 0.9470342397689819,
+      "learning_rate": 8.16888888888889e-06,
+      "loss": 0.0042,
+      "step": 1325
+    },
+    {
+      "epoch": 18.367346938775512,
+      "grad_norm": 0.14824901521205902,
+      "learning_rate": 8.113333333333334e-06,
+      "loss": 0.0036,
+      "step": 1350
+    },
+    {
+      "epoch": 18.707482993197278,
+      "grad_norm": 1.2378164529800415,
+      "learning_rate": 8.057777777777778e-06,
+      "loss": 0.0046,
+      "step": 1375
+    },
+    {
+      "epoch": 19.047619047619047,
+      "grad_norm": 2.7857964038848877,
+      "learning_rate": 8.002222222222222e-06,
+      "loss": 0.004,
+      "step": 1400
+    },
+    {
+      "epoch": 19.387755102040817,
+      "grad_norm": 0.5624294281005859,
+      "learning_rate": 7.946666666666666e-06,
+      "loss": 0.0073,
+      "step": 1425
+    },
+    {
+      "epoch": 19.727891156462587,
+      "grad_norm": 0.18347227573394775,
+      "learning_rate": 7.891111111111112e-06,
+      "loss": 0.0058,
+      "step": 1450
+    },
+    {
+      "epoch": 20.068027210884352,
+      "grad_norm": 0.3734131455421448,
+      "learning_rate": 7.835555555555556e-06,
+      "loss": 0.0066,
+      "step": 1475
+    },
+    {
+      "epoch": 20.408163265306122,
+      "grad_norm": 0.6362162828445435,
+      "learning_rate": 7.78e-06,
+      "loss": 0.0075,
+      "step": 1500
+    },
+    {
+      "epoch": 20.74829931972789,
+      "grad_norm": 0.8834488391876221,
+      "learning_rate": 7.724444444444446e-06,
+      "loss": 0.0057,
+      "step": 1525
+    },
+    {
+      "epoch": 21.08843537414966,
+      "grad_norm": 0.06029968708753586,
+      "learning_rate": 7.66888888888889e-06,
+      "loss": 0.0038,
+      "step": 1550
+    },
+    {
+      "epoch": 21.428571428571427,
+      "grad_norm": 1.0105019807815552,
+      "learning_rate": 7.613333333333334e-06,
+      "loss": 0.0039,
+      "step": 1575
+    },
+    {
+      "epoch": 21.768707482993197,
+      "grad_norm": 0.5381556153297424,
+      "learning_rate": 7.557777777777779e-06,
+      "loss": 0.0036,
+      "step": 1600
+    },
+    {
+      "epoch": 22.108843537414966,
+      "grad_norm": 0.08822619915008545,
+      "learning_rate": 7.502222222222223e-06,
+      "loss": 0.004,
+      "step": 1625
+    },
+    {
+      "epoch": 22.448979591836736,
+      "grad_norm": 0.43402913212776184,
+      "learning_rate": 7.446666666666668e-06,
+      "loss": 0.0029,
+      "step": 1650
+    },
+    {
+      "epoch": 22.7891156462585,
+      "grad_norm": 0.9147214293479919,
+      "learning_rate": 7.3911111111111125e-06,
+      "loss": 0.0024,
+      "step": 1675
+    },
+    {
+      "epoch": 23.12925170068027,
+      "grad_norm": 0.48390820622444153,
+      "learning_rate": 7.335555555555556e-06,
+      "loss": 0.0036,
+      "step": 1700
+    },
+    {
+      "epoch": 23.46938775510204,
+      "grad_norm": 0.10725089907646179,
+      "learning_rate": 7.280000000000001e-06,
+      "loss": 0.0023,
+      "step": 1725
+    },
+    {
+      "epoch": 23.80952380952381,
+      "grad_norm": 0.09872180968523026,
+      "learning_rate": 7.224444444444445e-06,
+      "loss": 0.0018,
+      "step": 1750
+    },
+    {
+      "epoch": 24.14965986394558,
+      "grad_norm": 0.6679806113243103,
+      "learning_rate": 7.1688888888888895e-06,
+      "loss": 0.0017,
+      "step": 1775
+    },
+    {
+      "epoch": 24.489795918367346,
+      "grad_norm": 0.02364278770983219,
+      "learning_rate": 7.113333333333334e-06,
+      "loss": 0.001,
+      "step": 1800
+    },
+    {
+      "epoch": 24.829931972789115,
+      "grad_norm": 0.02158285863697529,
+      "learning_rate": 7.057777777777778e-06,
+      "loss": 0.0008,
+      "step": 1825
+    },
+    {
+      "epoch": 25.170068027210885,
+      "grad_norm": 0.014277754351496696,
+      "learning_rate": 7.0022222222222225e-06,
+      "loss": 0.0007,
+      "step": 1850
+    },
+    {
+      "epoch": 25.510204081632654,
+      "grad_norm": 0.012241716496646404,
+      "learning_rate": 6.946666666666667e-06,
+      "loss": 0.0005,
+      "step": 1875
+    },
+    {
+      "epoch": 25.85034013605442,
+      "grad_norm": 0.02822299115359783,
+      "learning_rate": 6.891111111111111e-06,
+      "loss": 0.0005,
+      "step": 1900
+    },
+    {
+      "epoch": 26.19047619047619,
+      "grad_norm": 0.009908878244459629,
+      "learning_rate": 6.835555555555556e-06,
+      "loss": 0.0004,
+      "step": 1925
+    },
+    {
+      "epoch": 26.53061224489796,
+      "grad_norm": 0.008494613692164421,
+      "learning_rate": 6.780000000000001e-06,
+      "loss": 0.0004,
+      "step": 1950
+    },
+    {
+      "epoch": 26.87074829931973,
+      "grad_norm": 0.007728059310466051,
+      "learning_rate": 6.724444444444444e-06,
+      "loss": 0.0004,
+      "step": 1975
+    },
+    {
+      "epoch": 27.2108843537415,
+      "grad_norm": 0.007557597942650318,
+      "learning_rate": 6.668888888888889e-06,
+      "loss": 0.0004,
+      "step": 2000
+    },
+    {
+      "epoch": 27.2108843537415,
+      "eval_loss": 0.45324987173080444,
+      "eval_runtime": 93.804,
+      "eval_samples_per_second": 2.772,
+      "eval_steps_per_second": 0.181,
+      "eval_wer": 0.2245430809399478,
+      "step": 2000
+    },
+    {
+      "epoch": 27.551020408163264,
+      "grad_norm": 0.009665679186582565,
+      "learning_rate": 6.613333333333334e-06,
+      "loss": 0.0004,
+      "step": 2025
+    },
+    {
+      "epoch": 27.891156462585034,
+      "grad_norm": 0.006815009750425816,
+      "learning_rate": 6.557777777777778e-06,
+      "loss": 0.0004,
+      "step": 2050
+    },
+    {
+      "epoch": 28.231292517006803,
+      "grad_norm": 0.007364605087786913,
+      "learning_rate": 6.502222222222223e-06,
+      "loss": 0.0003,
+      "step": 2075
+    },
+    {
+      "epoch": 28.571428571428573,
+      "grad_norm": 0.006635705474764109,
+      "learning_rate": 6.446666666666668e-06,
+      "loss": 0.0003,
+      "step": 2100
+    },
+    {
+      "epoch": 28.91156462585034,
+      "grad_norm": 0.008073186501860619,
+      "learning_rate": 6.391111111111111e-06,
+      "loss": 0.0003,
+      "step": 2125
+    },
+    {
+      "epoch": 29.25170068027211,
+      "grad_norm": 0.006342068314552307,
+      "learning_rate": 6.335555555555556e-06,
+      "loss": 0.0003,
+      "step": 2150
+    },
+    {
+      "epoch": 29.591836734693878,
+      "grad_norm": 0.006897253915667534,
+      "learning_rate": 6.280000000000001e-06,
+      "loss": 0.0003,
+      "step": 2175
+    },
+    {
+      "epoch": 29.931972789115648,
+      "grad_norm": 0.006329766474664211,
+      "learning_rate": 6.224444444444445e-06,
+      "loss": 0.0003,
+      "step": 2200
+    },
+    {
+      "epoch": 30.272108843537413,
+      "grad_norm": 0.006696599069982767,
+      "learning_rate": 6.16888888888889e-06,
+      "loss": 0.0003,
+      "step": 2225
+    },
+    {
+      "epoch": 30.612244897959183,
+      "grad_norm": 0.0058494312688708305,
+      "learning_rate": 6.113333333333333e-06,
+      "loss": 0.0003,
+      "step": 2250
+    },
+    {
+      "epoch": 30.952380952380953,
+      "grad_norm": 0.005851502064615488,
+      "learning_rate": 6.057777777777778e-06,
+      "loss": 0.0003,
+      "step": 2275
+    },
+    {
+      "epoch": 31.292517006802722,
+      "grad_norm": 0.0047736396081745625,
+      "learning_rate": 6.002222222222223e-06,
+      "loss": 0.0003,
+      "step": 2300
+    },
+    {
+      "epoch": 31.632653061224488,
+      "grad_norm": 0.006324047688394785,
+      "learning_rate": 5.946666666666668e-06,
+      "loss": 0.0003,
+      "step": 2325
+    },
+    {
+      "epoch": 31.972789115646258,
+      "grad_norm": 0.005418767221271992,
+      "learning_rate": 5.891111111111112e-06,
+      "loss": 0.0003,
+      "step": 2350
+    },
+    {
+      "epoch": 32.31292517006803,
+      "grad_norm": 0.005563849117606878,
+      "learning_rate": 5.8355555555555565e-06,
+      "loss": 0.0003,
+      "step": 2375
+    },
+    {
+      "epoch": 32.6530612244898,
+      "grad_norm": 0.005108444020152092,
+      "learning_rate": 5.78e-06,
+      "loss": 0.0002,
+      "step": 2400
+    },
+    {
+      "epoch": 32.993197278911566,
+      "grad_norm": 0.004787669517099857,
+      "learning_rate": 5.724444444444445e-06,
+      "loss": 0.0003,
+      "step": 2425
+    },
+    {
+      "epoch": 33.333333333333336,
+      "grad_norm": 0.004051292315125465,
+      "learning_rate": 5.6688888888888895e-06,
+      "loss": 0.0002,
+      "step": 2450
+    },
+    {
+      "epoch": 33.673469387755105,
+      "grad_norm": 0.005220952443778515,
+      "learning_rate": 5.613333333333334e-06,
+      "loss": 0.0002,
+      "step": 2475
+    },
+    {
+      "epoch": 34.01360544217687,
+      "grad_norm": 0.0054339151829481125,
+      "learning_rate": 5.557777777777778e-06,
+      "loss": 0.0002,
+      "step": 2500
+    },
+    {
+      "epoch": 34.35374149659864,
+      "grad_norm": 0.004454713314771652,
+      "learning_rate": 5.5022222222222224e-06,
+      "loss": 0.0002,
+      "step": 2525
+    },
+    {
+      "epoch": 34.69387755102041,
+      "grad_norm": 0.005186771042644978,
+      "learning_rate": 5.4466666666666665e-06,
+      "loss": 0.0002,
+      "step": 2550
+    },
+    {
+      "epoch": 35.034013605442176,
+      "grad_norm": 0.004502983298152685,
+      "learning_rate": 5.391111111111111e-06,
+      "loss": 0.0002,
+      "step": 2575
+    },
+    {
+      "epoch": 35.374149659863946,
+      "grad_norm": 0.004623442888259888,
+      "learning_rate": 5.335555555555556e-06,
+      "loss": 0.0002,
+      "step": 2600
+    },
+    {
+      "epoch": 35.714285714285715,
+      "grad_norm": 0.00428406847640872,
+      "learning_rate": 5.28e-06,
+      "loss": 0.0002,
+      "step": 2625
+    },
+    {
+      "epoch": 36.054421768707485,
+      "grad_norm": 0.004207184072583914,
+      "learning_rate": 5.224444444444445e-06,
+      "loss": 0.0002,
+      "step": 2650
+    },
+    {
+      "epoch": 36.394557823129254,
+      "grad_norm": 0.004264296032488346,
+      "learning_rate": 5.168888888888889e-06,
+      "loss": 0.0002,
+      "step": 2675
+    },
+    {
+      "epoch": 36.734693877551024,
+      "grad_norm": 0.0045384918339550495,
+      "learning_rate": 5.113333333333333e-06,
+      "loss": 0.0002,
+      "step": 2700
+    },
+    {
+      "epoch": 37.074829931972786,
+      "grad_norm": 0.0036523097660392523,
+      "learning_rate": 5.057777777777778e-06,
+      "loss": 0.0002,
+      "step": 2725
+    },
+    {
+      "epoch": 37.414965986394556,
+      "grad_norm": 0.003838042262941599,
+      "learning_rate": 5.002222222222223e-06,
+      "loss": 0.0002,
+      "step": 2750
+    },
+    {
+      "epoch": 37.755102040816325,
+      "grad_norm": 0.0043487842194736,
+      "learning_rate": 4.946666666666667e-06,
+      "loss": 0.0002,
+      "step": 2775
+    },
+    {
+      "epoch": 38.095238095238095,
+      "grad_norm": 0.004179787822067738,
+      "learning_rate": 4.891111111111111e-06,
+      "loss": 0.0002,
+      "step": 2800
+    },
+    {
+      "epoch": 38.435374149659864,
+      "grad_norm": 0.0036503339651972055,
+      "learning_rate": 4.835555555555556e-06,
+      "loss": 0.0002,
+      "step": 2825
+    },
+    {
+      "epoch": 38.775510204081634,
+      "grad_norm": 0.0033976498525589705,
+      "learning_rate": 4.78e-06,
+      "loss": 0.0002,
+      "step": 2850
+    },
+    {
+      "epoch": 39.1156462585034,
+      "grad_norm": 0.0038732371758669615,
+      "learning_rate": 4.724444444444445e-06,
+      "loss": 0.0002,
+      "step": 2875
+    },
+    {
+      "epoch": 39.45578231292517,
+      "grad_norm": 0.003690896322950721,
+      "learning_rate": 4.66888888888889e-06,
+      "loss": 0.0002,
+      "step": 2900
+    },
+    {
+      "epoch": 39.795918367346935,
+      "grad_norm": 0.005354354623705149,
+      "learning_rate": 4.613333333333334e-06,
+      "loss": 0.0002,
+      "step": 2925
+    },
+    {
+      "epoch": 40.136054421768705,
+      "grad_norm": 0.0036710058338940144,
+      "learning_rate": 4.557777777777778e-06,
+      "loss": 0.0002,
+      "step": 2950
+    },
+    {
+      "epoch": 40.476190476190474,
+      "grad_norm": 0.005290627479553223,
+      "learning_rate": 4.502222222222223e-06,
+      "loss": 0.0002,
+      "step": 2975
+    },
+    {
+      "epoch": 40.816326530612244,
+      "grad_norm": 0.003753775032237172,
+      "learning_rate": 4.446666666666667e-06,
+      "loss": 0.0002,
+      "step": 3000
+    },
+    {
+      "epoch": 40.816326530612244,
+      "eval_loss": 0.4882185459136963,
+      "eval_runtime": 93.7044,
+      "eval_samples_per_second": 2.775,
+      "eval_steps_per_second": 0.181,
+      "eval_wer": 0.2174561730697501,
+      "step": 3000
+    },
+    {
+      "epoch": 41.156462585034014,
+      "grad_norm": 0.004405771382153034,
+      "learning_rate": 4.391111111111112e-06,
+      "loss": 0.0002,
+      "step": 3025
+    },
+    {
+      "epoch": 41.49659863945578,
+      "grad_norm": 0.0036535647232085466,
+      "learning_rate": 4.3355555555555565e-06,
+      "loss": 0.0002,
+      "step": 3050
+    },
+    {
+      "epoch": 41.83673469387755,
+      "grad_norm": 0.0036972814705222845,
+      "learning_rate": 4.2800000000000005e-06,
+      "loss": 0.0002,
+      "step": 3075
+    },
+    {
+      "epoch": 42.17687074829932,
+      "grad_norm": 0.004110525827854872,
+      "learning_rate": 4.2244444444444446e-06,
+      "loss": 0.0002,
+      "step": 3100
+    },
+    {
+      "epoch": 42.51700680272109,
+      "grad_norm": 0.0035640313290059566,
+      "learning_rate": 4.168888888888889e-06,
+      "loss": 0.0002,
+      "step": 3125
+    },
+    {
+      "epoch": 42.857142857142854,
+      "grad_norm": 0.004424062091857195,
+      "learning_rate": 4.1133333333333335e-06,
+      "loss": 0.0002,
+      "step": 3150
+    },
+    {
+      "epoch": 43.197278911564624,
+      "grad_norm": 0.0032335869036614895,
+      "learning_rate": 4.057777777777778e-06,
+      "loss": 0.0002,
+      "step": 3175
+    },
+    {
+      "epoch": 43.53741496598639,
+      "grad_norm": 0.0037836297415196896,
+      "learning_rate": 4.002222222222222e-06,
+      "loss": 0.0002,
+      "step": 3200
+    },
+    {
+      "epoch": 43.87755102040816,
+      "grad_norm": 0.003560603130608797,
+      "learning_rate": 3.946666666666667e-06,
+      "loss": 0.0002,
+      "step": 3225
+    },
+    {
+      "epoch": 44.21768707482993,
+      "grad_norm": 0.003510043490678072,
+      "learning_rate": 3.891111111111111e-06,
+      "loss": 0.0002,
+      "step": 3250
+    },
+    {
+      "epoch": 44.5578231292517,
+      "grad_norm": 0.0028691268526017666,
+      "learning_rate": 3.835555555555555e-06,
+      "loss": 0.0002,
+      "step": 3275
+    },
+    {
+      "epoch": 44.89795918367347,
+      "grad_norm": 0.0031337698455899954,
+      "learning_rate": 3.7800000000000002e-06,
+      "loss": 0.0001,
+      "step": 3300
+    },
+    {
+      "epoch": 45.23809523809524,
+      "grad_norm": 0.00317736086435616,
+      "learning_rate": 3.724444444444445e-06,
+      "loss": 0.0001,
+      "step": 3325
+    },
+    {
+      "epoch": 45.578231292517,
+      "grad_norm": 0.0029643489979207516,
+      "learning_rate": 3.668888888888889e-06,
+      "loss": 0.0002,
+      "step": 3350
+    },
+    {
+      "epoch": 45.91836734693877,
+      "grad_norm": 0.003078688168898225,
+      "learning_rate": 3.6133333333333336e-06,
+      "loss": 0.0001,
+      "step": 3375
+    },
+    {
+      "epoch": 46.25850340136054,
+      "grad_norm": 0.003043568693101406,
+      "learning_rate": 3.5577777777777785e-06,
+      "loss": 0.0001,
+      "step": 3400
+    },
+    {
+      "epoch": 46.59863945578231,
+      "grad_norm": 0.003218689002096653,
+      "learning_rate": 3.5022222222222225e-06,
+      "loss": 0.0001,
+      "step": 3425
+    },
+    {
+      "epoch": 46.93877551020408,
+      "grad_norm": 0.003266324056312442,
+      "learning_rate": 3.446666666666667e-06,
+      "loss": 0.0001,
+      "step": 3450
+    },
+    {
+      "epoch": 47.27891156462585,
+      "grad_norm": 0.003477707039564848,
+      "learning_rate": 3.391111111111111e-06,
+      "loss": 0.0001,
+      "step": 3475
+    },
+    {
+      "epoch": 47.61904761904762,
+      "grad_norm": 0.0027373475022614002,
+      "learning_rate": 3.335555555555556e-06,
+      "loss": 0.0001,
+      "step": 3500
+    },
+    {
+      "epoch": 47.95918367346939,
+      "grad_norm": 0.002786448458209634,
+      "learning_rate": 3.2800000000000004e-06,
+      "loss": 0.0001,
+      "step": 3525
+    },
+    {
+      "epoch": 48.29931972789116,
+      "grad_norm": 0.002394324168562889,
+      "learning_rate": 3.2244444444444444e-06,
+      "loss": 0.0001,
+      "step": 3550
+    },
+    {
+      "epoch": 48.63945578231292,
+      "grad_norm": 0.003250208217650652,
+      "learning_rate": 3.1688888888888893e-06,
+      "loss": 0.0001,
+      "step": 3575
+    },
+    {
+      "epoch": 48.97959183673469,
+      "grad_norm": 0.0029996377415955067,
+      "learning_rate": 3.1133333333333337e-06,
+      "loss": 0.0001,
+      "step": 3600
+    },
+    {
+      "epoch": 49.31972789115646,
+      "grad_norm": 0.0026746434159576893,
+      "learning_rate": 3.0577777777777778e-06,
+      "loss": 0.0001,
+      "step": 3625
+    },
+    {
+      "epoch": 49.65986394557823,
+      "grad_norm": 0.00262379739433527,
+      "learning_rate": 3.0022222222222227e-06,
+      "loss": 0.0001,
+      "step": 3650
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 0.0029098980594426394,
+      "learning_rate": 2.946666666666667e-06,
+      "loss": 0.0001,
+      "step": 3675
+    },
+    {
+      "epoch": 50.34013605442177,
+      "grad_norm": 0.002616139827296138,
+      "learning_rate": 2.891111111111111e-06,
+      "loss": 0.0001,
+      "step": 3700
+    },
+    {
+      "epoch": 50.68027210884354,
+      "grad_norm": 0.0029571950435638428,
+      "learning_rate": 2.835555555555556e-06,
+      "loss": 0.0001,
+      "step": 3725
+    },
+    {
+      "epoch": 51.02040816326531,
+      "grad_norm": 0.0027916007675230503,
+      "learning_rate": 2.7800000000000005e-06,
+      "loss": 0.0001,
+      "step": 3750
+    },
+    {
+      "epoch": 51.36054421768708,
+      "grad_norm": 0.002735557733103633,
+      "learning_rate": 2.7244444444444445e-06,
+      "loss": 0.0001,
+      "step": 3775
+    },
+    {
+      "epoch": 51.70068027210884,
+      "grad_norm": 0.0023191324435174465,
+      "learning_rate": 2.6688888888888894e-06,
+      "loss": 0.0001,
+      "step": 3800
+    },
+    {
+      "epoch": 52.04081632653061,
+      "grad_norm": 0.0034847650676965714,
+      "learning_rate": 2.6133333333333334e-06,
+      "loss": 0.0001,
+      "step": 3825
+    },
+    {
+      "epoch": 52.38095238095238,
+      "grad_norm": 0.002770556602627039,
+      "learning_rate": 2.557777777777778e-06,
+      "loss": 0.0001,
+      "step": 3850
+    },
+    {
+      "epoch": 52.72108843537415,
+      "grad_norm": 0.0030505817849189043,
+      "learning_rate": 2.5022222222222224e-06,
+      "loss": 0.0001,
+      "step": 3875
+    },
+    {
+      "epoch": 53.06122448979592,
+      "grad_norm": 0.003404865274205804,
+      "learning_rate": 2.446666666666667e-06,
+      "loss": 0.0001,
+      "step": 3900
+    },
+    {
+      "epoch": 53.40136054421769,
+      "grad_norm": 0.0026544102001935244,
+      "learning_rate": 2.3911111111111113e-06,
+      "loss": 0.0001,
+      "step": 3925
+    },
+    {
+      "epoch": 53.74149659863946,
+      "grad_norm": 0.00271439622156322,
+      "learning_rate": 2.3355555555555557e-06,
+      "loss": 0.0001,
+      "step": 3950
+    },
+    {
+      "epoch": 54.08163265306123,
+      "grad_norm": 0.0033124638721346855,
+      "learning_rate": 2.28e-06,
+      "loss": 0.0001,
+      "step": 3975
+    },
+    {
+      "epoch": 54.421768707483,
+      "grad_norm": 0.0025922644417732954,
+      "learning_rate": 2.2244444444444447e-06,
+      "loss": 0.0001,
+      "step": 4000
+    },
+    {
+      "epoch": 54.421768707483,
+      "eval_loss": 0.5051469206809998,
+      "eval_runtime": 95.0455,
+      "eval_samples_per_second": 2.736,
+      "eval_steps_per_second": 0.179,
+      "eval_wer": 0.21484520701230883,
+      "step": 4000
+    },
+    {
+      "epoch": 54.76190476190476,
+      "grad_norm": 0.0020597511902451515,
+      "learning_rate": 2.168888888888889e-06,
+      "loss": 0.0001,
+      "step": 4025
+    },
+    {
+      "epoch": 55.10204081632653,
+      "grad_norm": 0.002817349275574088,
+      "learning_rate": 2.1133333333333336e-06,
+      "loss": 0.0001,
+      "step": 4050
+    },
+    {
+      "epoch": 55.4421768707483,
+      "grad_norm": 0.003287636674940586,
+      "learning_rate": 2.057777777777778e-06,
+      "loss": 0.0001,
+      "step": 4075
+    },
+    {
+      "epoch": 55.78231292517007,
+      "grad_norm": 0.00247744913212955,
+      "learning_rate": 2.0022222222222225e-06,
+      "loss": 0.0001,
+      "step": 4100
+    },
+    {
+      "epoch": 56.12244897959184,
+      "grad_norm": 0.003431103890761733,
+      "learning_rate": 1.9466666666666665e-06,
+      "loss": 0.0001,
+      "step": 4125
+    },
+    {
+      "epoch": 56.46258503401361,
+      "grad_norm": 0.0024367747828364372,
+      "learning_rate": 1.8911111111111114e-06,
+      "loss": 0.0001,
+      "step": 4150
+    },
+    {
+      "epoch": 56.802721088435376,
+      "grad_norm": 0.0022823926992714405,
+      "learning_rate": 1.8355555555555557e-06,
+      "loss": 0.0001,
+      "step": 4175
+    },
+    {
+      "epoch": 57.142857142857146,
+      "grad_norm": 0.0022000963799655437,
+      "learning_rate": 1.7800000000000001e-06,
+      "loss": 0.0001,
+      "step": 4200
+    },
+    {
+      "epoch": 57.48299319727891,
+      "grad_norm": 0.0023311020340770483,
+      "learning_rate": 1.7244444444444448e-06,
+      "loss": 0.0001,
+      "step": 4225
+    },
+    {
+      "epoch": 57.82312925170068,
+      "grad_norm": 0.002466644160449505,
+      "learning_rate": 1.668888888888889e-06,
+      "loss": 0.0001,
+      "step": 4250
+    },
+    {
+      "epoch": 58.16326530612245,
+      "grad_norm": 0.0023317814338952303,
+      "learning_rate": 1.6133333333333335e-06,
+      "loss": 0.0001,
+      "step": 4275
+    },
+    {
+      "epoch": 58.50340136054422,
+      "grad_norm": 0.0034895280841737986,
+      "learning_rate": 1.5577777777777777e-06,
+      "loss": 0.0001,
+      "step": 4300
+    },
+    {
+      "epoch": 58.843537414965986,
+      "grad_norm": 0.002141441684216261,
+      "learning_rate": 1.5022222222222224e-06,
+      "loss": 0.0001,
+      "step": 4325
+    },
+    {
+      "epoch": 59.183673469387756,
+      "grad_norm": 0.0023929886519908905,
+      "learning_rate": 1.4466666666666669e-06,
+      "loss": 0.0001,
+      "step": 4350
+    },
+    {
+      "epoch": 59.523809523809526,
+      "grad_norm": 0.002914367476478219,
+      "learning_rate": 1.3911111111111111e-06,
+      "loss": 0.0001,
+      "step": 4375
+    },
+    {
+      "epoch": 59.863945578231295,
+      "grad_norm": 0.0023239688016474247,
+      "learning_rate": 1.3355555555555558e-06,
+      "loss": 0.0001,
+      "step": 4400
+    },
+    {
+      "epoch": 60.204081632653065,
+      "grad_norm": 0.00241728313267231,
+      "learning_rate": 1.28e-06,
+      "loss": 0.0001,
+      "step": 4425
+    },
+    {
+      "epoch": 60.54421768707483,
+      "grad_norm": 0.0032376388553529978,
+      "learning_rate": 1.2244444444444445e-06,
+      "loss": 0.0001,
+      "step": 4450
+    },
+    {
+      "epoch": 60.8843537414966,
+      "grad_norm": 0.003632117761299014,
+      "learning_rate": 1.168888888888889e-06,
+      "loss": 0.0001,
+      "step": 4475
+    },
+    {
+      "epoch": 61.224489795918366,
+      "grad_norm": 0.002522936789318919,
+      "learning_rate": 1.1133333333333334e-06,
+      "loss": 0.0001,
+      "step": 4500
+    },
+    {
+      "epoch": 61.564625850340136,
+      "grad_norm": 0.002181953750550747,
+      "learning_rate": 1.0577777777777779e-06,
+      "loss": 0.0001,
+      "step": 4525
+    },
+    {
+      "epoch": 61.904761904761905,
+      "grad_norm": 0.0020987866446375847,
+      "learning_rate": 1.0022222222222223e-06,
+      "loss": 0.0001,
+      "step": 4550
+    },
+    {
+      "epoch": 62.244897959183675,
+      "grad_norm": 0.002102503553032875,
+      "learning_rate": 9.466666666666667e-07,
+      "loss": 0.0001,
+      "step": 4575
+    },
+    {
+      "epoch": 62.585034013605444,
+      "grad_norm": 0.0019837727304548025,
+      "learning_rate": 8.911111111111112e-07,
+      "loss": 0.0001,
+      "step": 4600
+    },
+    {
+      "epoch": 62.925170068027214,
+      "grad_norm": 0.002303441520780325,
+      "learning_rate": 8.355555555555556e-07,
+      "loss": 0.0001,
+      "step": 4625
+    },
+    {
+      "epoch": 63.265306122448976,
+      "grad_norm": 0.007395027671009302,
+      "learning_rate": 7.8e-07,
+      "loss": 0.0001,
+      "step": 4650
+    },
+    {
+      "epoch": 63.605442176870746,
+      "grad_norm": 0.002733208704739809,
+      "learning_rate": 7.244444444444446e-07,
+      "loss": 0.0001,
+      "step": 4675
+    },
+    {
+      "epoch": 63.945578231292515,
+      "grad_norm": 0.0020845523104071617,
+      "learning_rate": 6.68888888888889e-07,
+      "loss": 0.0001,
+      "step": 4700
+    },
+    {
+      "epoch": 64.28571428571429,
+      "grad_norm": 0.0019409642554819584,
+      "learning_rate": 6.133333333333333e-07,
+      "loss": 0.0001,
+      "step": 4725
+    },
+    {
+      "epoch": 64.62585034013605,
+      "grad_norm": 0.00258248602040112,
+      "learning_rate": 5.577777777777779e-07,
+      "loss": 0.0001,
+      "step": 4750
+    },
+    {
+      "epoch": 64.96598639455782,
+      "grad_norm": 0.0025006316136568785,
+      "learning_rate": 5.022222222222222e-07,
+      "loss": 0.0001,
+      "step": 4775
+    },
+    {
+      "epoch": 65.3061224489796,
+      "grad_norm": 0.0022064538206905127,
+      "learning_rate": 4.466666666666667e-07,
+      "loss": 0.0001,
+      "step": 4800
+    },
+    {
+      "epoch": 65.64625850340136,
+      "grad_norm": 0.002108414890244603,
+      "learning_rate": 3.9111111111111115e-07,
+      "loss": 0.0001,
+      "step": 4825
+    },
+    {
+      "epoch": 65.98639455782313,
+      "grad_norm": 0.0021663971710950136,
+      "learning_rate": 3.3555555555555556e-07,
+      "loss": 0.0001,
+      "step": 4850
+    },
+    {
+      "epoch": 66.3265306122449,
+      "grad_norm": 0.00204038736410439,
+      "learning_rate": 2.8e-07,
+      "loss": 0.0001,
+      "step": 4875
+    },
+    {
+      "epoch": 66.66666666666667,
+      "grad_norm": 0.0022622975520789623,
+      "learning_rate": 2.2444444444444445e-07,
+      "loss": 0.0001,
+      "step": 4900
+    },
+    {
+      "epoch": 67.00680272108843,
+      "grad_norm": 0.0033368293661624193,
+      "learning_rate": 1.6888888888888888e-07,
+      "loss": 0.0001,
+      "step": 4925
+    },
+    {
+      "epoch": 67.34693877551021,
+      "grad_norm": 0.0019737225957214832,
+      "learning_rate": 1.1333333333333336e-07,
+      "loss": 0.0001,
+      "step": 4950
+    },
+    {
+      "epoch": 67.68707482993197,
+      "grad_norm": 0.0019130747532472014,
+      "learning_rate": 5.777777777777778e-08,
+      "loss": 0.0001,
+      "step": 4975
+    },
+    {
+      "epoch": 68.02721088435374,
+      "grad_norm": 0.002000050852075219,
+      "learning_rate": 2.2222222222222225e-09,
+      "loss": 0.0001,
+      "step": 5000
+    },
+    {
+      "epoch": 68.02721088435374,
+      "eval_loss": 0.5118595957756042,
+      "eval_runtime": 95.0278,
+      "eval_samples_per_second": 2.736,
+      "eval_steps_per_second": 0.179,
+      "eval_wer": 0.21671018276762402,
+      "step": 5000
+    },
+    {
+      "epoch": 68.02721088435374,
+      "step": 5000,
+      "total_flos": 3.378304801456128e+20,
+      "train_loss": 0.03018118931162171,
+      "train_runtime": 39486.7724,
+      "train_samples_per_second": 4.052,
+      "train_steps_per_second": 0.127
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 69,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.378304801456128e+20,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}