|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 39.95203400121433, |
|
"eval_steps": 100.0, |
|
"global_step": 32920, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.607164541590771, |
|
"grad_norm": 57.35422134399414, |
|
"learning_rate": 1.188e-06, |
|
"loss": 28.44, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.1284080132764343, |
|
"eval_loss": 10.8607177734375, |
|
"eval_runtime": 145.137, |
|
"eval_samples_per_second": 47.128, |
|
"eval_steps_per_second": 5.891, |
|
"eval_wer": 1.0, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.2137219186399515, |
|
"grad_norm": 44.843685150146484, |
|
"learning_rate": 2.3880000000000003e-06, |
|
"loss": 11.3, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8208864602307226, |
|
"grad_norm": 36.54290008544922, |
|
"learning_rate": 3.588e-06, |
|
"loss": 9.2184, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 1.1284376481744902, |
|
"eval_loss": 7.510649681091309, |
|
"eval_runtime": 131.1044, |
|
"eval_samples_per_second": 52.172, |
|
"eval_steps_per_second": 6.522, |
|
"eval_wer": 1.0, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 2.427443837279903, |
|
"grad_norm": 22.59714126586914, |
|
"learning_rate": 4.788e-06, |
|
"loss": 7.0297, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 1.1283783783783783, |
|
"eval_loss": 4.190179347991943, |
|
"eval_runtime": 131.4162, |
|
"eval_samples_per_second": 52.048, |
|
"eval_steps_per_second": 6.506, |
|
"eval_wer": 1.0, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 3.0340012143290833, |
|
"grad_norm": 10.585729598999023, |
|
"learning_rate": 5.988e-06, |
|
"loss": 4.8903, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.6411657559198543, |
|
"grad_norm": 2.5189406871795654, |
|
"learning_rate": 7.1880000000000005e-06, |
|
"loss": 3.6874, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 1.1283783783783783, |
|
"eval_loss": 3.1195626258850098, |
|
"eval_runtime": 129.8504, |
|
"eval_samples_per_second": 52.676, |
|
"eval_steps_per_second": 6.584, |
|
"eval_wer": 1.0, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 4.247723132969035, |
|
"grad_norm": 1.423240303993225, |
|
"learning_rate": 8.388e-06, |
|
"loss": 3.1803, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.854887674559806, |
|
"grad_norm": 1.8786451816558838, |
|
"learning_rate": 9.588e-06, |
|
"loss": 2.7259, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 1.1283783783783783, |
|
"eval_loss": 2.2997305393218994, |
|
"eval_runtime": 128.0868, |
|
"eval_samples_per_second": 53.401, |
|
"eval_steps_per_second": 6.675, |
|
"eval_wer": 1.0, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 5.461445051608986, |
|
"grad_norm": 3.4784176349639893, |
|
"learning_rate": 1.0787999999999999e-05, |
|
"loss": 2.1431, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 1.0313537221431959, |
|
"eval_loss": 1.0496456623077393, |
|
"eval_runtime": 138.552, |
|
"eval_samples_per_second": 49.368, |
|
"eval_steps_per_second": 6.171, |
|
"eval_wer": 0.9997076023391813, |
|
"step": 4944 |
|
}, |
|
{ |
|
"epoch": 6.068002428658167, |
|
"grad_norm": 2.7191321849823, |
|
"learning_rate": 1.1988000000000001e-05, |
|
"loss": 1.3807, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.675166970248937, |
|
"grad_norm": 2.5248982906341553, |
|
"learning_rate": 1.3188e-05, |
|
"loss": 0.8891, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 1.017425320056899, |
|
"eval_loss": 0.6379755735397339, |
|
"eval_runtime": 135.841, |
|
"eval_samples_per_second": 50.353, |
|
"eval_steps_per_second": 6.294, |
|
"eval_wer": 0.9998538011695907, |
|
"step": 5768 |
|
}, |
|
{ |
|
"epoch": 7.281724347298118, |
|
"grad_norm": 7.416316509246826, |
|
"learning_rate": 1.4388000000000002e-05, |
|
"loss": 0.6223, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.888888888888889, |
|
"grad_norm": 6.211233615875244, |
|
"learning_rate": 1.5588e-05, |
|
"loss": 0.4891, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 1.0305239449976291, |
|
"eval_loss": 0.25890296697616577, |
|
"eval_runtime": 76.2932, |
|
"eval_samples_per_second": 89.654, |
|
"eval_steps_per_second": 11.207, |
|
"eval_wer": 0.9991228070175439, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 8.49544626593807, |
|
"grad_norm": 2.76125431060791, |
|
"learning_rate": 1.6788e-05, |
|
"loss": 0.3675, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 1.0932313892840209, |
|
"eval_loss": 0.6099491119384766, |
|
"eval_runtime": 74.5927, |
|
"eval_samples_per_second": 91.698, |
|
"eval_steps_per_second": 11.462, |
|
"eval_wer": 0.9994152046783625, |
|
"step": 7416 |
|
}, |
|
{ |
|
"epoch": 9.102003642987249, |
|
"grad_norm": 8.747713088989258, |
|
"learning_rate": 1.7988e-05, |
|
"loss": 0.3164, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.70916818457802, |
|
"grad_norm": 7.222991943359375, |
|
"learning_rate": 1.9188e-05, |
|
"loss": 0.2744, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 1.023944997629208, |
|
"eval_loss": 0.1262398213148117, |
|
"eval_runtime": 62.8915, |
|
"eval_samples_per_second": 108.759, |
|
"eval_steps_per_second": 13.595, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 10.3157255616272, |
|
"grad_norm": 7.39332389831543, |
|
"learning_rate": 2.0388e-05, |
|
"loss": 0.2525, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 10.922890103217972, |
|
"grad_norm": 3.6922662258148193, |
|
"learning_rate": 2.1588e-05, |
|
"loss": 0.2278, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 1.023411569464201, |
|
"eval_loss": 0.11068873107433319, |
|
"eval_runtime": 67.9091, |
|
"eval_samples_per_second": 100.723, |
|
"eval_steps_per_second": 12.59, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 9064 |
|
}, |
|
{ |
|
"epoch": 11.529447480267152, |
|
"grad_norm": 5.623218536376953, |
|
"learning_rate": 2.2788000000000003e-05, |
|
"loss": 0.2148, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 1.0260787102892366, |
|
"eval_loss": 0.06716426461935043, |
|
"eval_runtime": 89.5293, |
|
"eval_samples_per_second": 76.4, |
|
"eval_steps_per_second": 9.55, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 9888 |
|
}, |
|
{ |
|
"epoch": 12.136004857316333, |
|
"grad_norm": 4.379143714904785, |
|
"learning_rate": 2.3988e-05, |
|
"loss": 0.2055, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.743169398907105, |
|
"grad_norm": 5.908865451812744, |
|
"learning_rate": 2.5188e-05, |
|
"loss": 0.1927, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 1.0219298245614035, |
|
"eval_loss": 0.059190813452005386, |
|
"eval_runtime": 80.5762, |
|
"eval_samples_per_second": 84.889, |
|
"eval_steps_per_second": 10.611, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 10712 |
|
}, |
|
{ |
|
"epoch": 13.349726775956285, |
|
"grad_norm": 3.240206003189087, |
|
"learning_rate": 2.63856e-05, |
|
"loss": 0.1926, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 13.956891317547056, |
|
"grad_norm": 5.876375675201416, |
|
"learning_rate": 2.7585600000000002e-05, |
|
"loss": 0.1723, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 1.022848506401138, |
|
"eval_loss": 0.08003176748752594, |
|
"eval_runtime": 62.2149, |
|
"eval_samples_per_second": 109.941, |
|
"eval_steps_per_second": 13.743, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 11536 |
|
}, |
|
{ |
|
"epoch": 14.563448694596236, |
|
"grad_norm": 3.714435577392578, |
|
"learning_rate": 2.87856e-05, |
|
"loss": 0.1725, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 1.021100047415837, |
|
"eval_loss": 0.051837269216775894, |
|
"eval_runtime": 62.5224, |
|
"eval_samples_per_second": 109.401, |
|
"eval_steps_per_second": 13.675, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 15.170006071645416, |
|
"grad_norm": 9.124956130981445, |
|
"learning_rate": 2.99856e-05, |
|
"loss": 0.1695, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 15.777170613236187, |
|
"grad_norm": 3.6504106521606445, |
|
"learning_rate": 2.9956874399074467e-05, |
|
"loss": 0.1628, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 1.0142247510668563, |
|
"eval_loss": 0.13594070076942444, |
|
"eval_runtime": 75.4469, |
|
"eval_samples_per_second": 90.66, |
|
"eval_steps_per_second": 11.332, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 13184 |
|
}, |
|
{ |
|
"epoch": 16.38372799028537, |
|
"grad_norm": 3.7022886276245117, |
|
"learning_rate": 2.9825295862461663e-05, |
|
"loss": 0.1626, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 16.99089253187614, |
|
"grad_norm": 1.3335272073745728, |
|
"learning_rate": 2.9606033905859603e-05, |
|
"loss": 0.1567, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 1.0195886676149835, |
|
"eval_loss": 0.04436279088258743, |
|
"eval_runtime": 64.8228, |
|
"eval_samples_per_second": 105.518, |
|
"eval_steps_per_second": 13.19, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 14008 |
|
}, |
|
{ |
|
"epoch": 17.59744990892532, |
|
"grad_norm": 2.4595658779144287, |
|
"learning_rate": 2.9300385342391396e-05, |
|
"loss": 0.1436, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 1.0192626837363679, |
|
"eval_loss": 0.04214438423514366, |
|
"eval_runtime": 63.292, |
|
"eval_samples_per_second": 108.07, |
|
"eval_steps_per_second": 13.509, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 14832 |
|
}, |
|
{ |
|
"epoch": 18.204007285974498, |
|
"grad_norm": 0.9890690445899963, |
|
"learning_rate": 2.891015791414923e-05, |
|
"loss": 0.1495, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 18.81117182756527, |
|
"grad_norm": 8.133474349975586, |
|
"learning_rate": 2.843765960040039e-05, |
|
"loss": 0.1351, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 1.0173067804646752, |
|
"eval_loss": 0.03748102858662605, |
|
"eval_runtime": 63.0874, |
|
"eval_samples_per_second": 108.421, |
|
"eval_steps_per_second": 13.553, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 15656 |
|
}, |
|
{ |
|
"epoch": 19.41772920461445, |
|
"grad_norm": 5.602595806121826, |
|
"learning_rate": 2.7885684967167233e-05, |
|
"loss": 0.1454, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 1.018640350877193, |
|
"eval_loss": 0.03039967454969883, |
|
"eval_runtime": 77.538, |
|
"eval_samples_per_second": 88.215, |
|
"eval_steps_per_second": 11.027, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 16480 |
|
}, |
|
{ |
|
"epoch": 20.02428658166363, |
|
"grad_norm": 2.98942494392395, |
|
"learning_rate": 2.7257498638915816e-05, |
|
"loss": 0.1353, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 20.6314511232544, |
|
"grad_norm": 6.270744800567627, |
|
"learning_rate": 2.6558287021276313e-05, |
|
"loss": 0.1252, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 1.0237079184447606, |
|
"eval_loss": 0.05677889287471771, |
|
"eval_runtime": 62.9546, |
|
"eval_samples_per_second": 108.65, |
|
"eval_steps_per_second": 13.581, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 17304 |
|
}, |
|
{ |
|
"epoch": 21.238008500303582, |
|
"grad_norm": 3.5701606273651123, |
|
"learning_rate": 2.578938449744228e-05, |
|
"loss": 0.1249, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 21.845173041894352, |
|
"grad_norm": 6.276644706726074, |
|
"learning_rate": 2.4956668735674143e-05, |
|
"loss": 0.1233, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 1.0176031294452348, |
|
"eval_loss": 0.029143376275897026, |
|
"eval_runtime": 63.804, |
|
"eval_samples_per_second": 107.203, |
|
"eval_steps_per_second": 13.4, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 18128 |
|
}, |
|
{ |
|
"epoch": 22.451730418943534, |
|
"grad_norm": 3.3094053268432617, |
|
"learning_rate": 2.40650647888375e-05, |
|
"loss": 0.1179, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 1.016743717401612, |
|
"eval_loss": 0.02712642401456833, |
|
"eval_runtime": 63.365, |
|
"eval_samples_per_second": 107.946, |
|
"eval_steps_per_second": 13.493, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 18952 |
|
}, |
|
{ |
|
"epoch": 23.058287795992715, |
|
"grad_norm": 4.888893127441406, |
|
"learning_rate": 2.3123726366487132e-05, |
|
"loss": 0.1141, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 23.665452337583485, |
|
"grad_norm": 1.941120982170105, |
|
"learning_rate": 2.2130663756909194e-05, |
|
"loss": 0.1108, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 1.0178402086296823, |
|
"eval_loss": 0.027879294008016586, |
|
"eval_runtime": 65.3857, |
|
"eval_samples_per_second": 104.61, |
|
"eval_steps_per_second": 13.076, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 19776 |
|
}, |
|
{ |
|
"epoch": 24.272009714632667, |
|
"grad_norm": 1.846130132675171, |
|
"learning_rate": 2.1095427217664034e-05, |
|
"loss": 0.1089, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 24.879174256223436, |
|
"grad_norm": 3.097496271133423, |
|
"learning_rate": 2.002413959993121e-05, |
|
"loss": 0.1031, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 1.0182847321005215, |
|
"eval_loss": 0.03511003032326698, |
|
"eval_runtime": 65.1106, |
|
"eval_samples_per_second": 105.052, |
|
"eval_steps_per_second": 13.132, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 25.485731633272618, |
|
"grad_norm": 5.548646926879883, |
|
"learning_rate": 1.8923136977067138e-05, |
|
"loss": 0.1006, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 1.0173364153627311, |
|
"eval_loss": 0.04413418844342232, |
|
"eval_runtime": 62.3139, |
|
"eval_samples_per_second": 109.767, |
|
"eval_steps_per_second": 13.721, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 21424 |
|
}, |
|
{ |
|
"epoch": 26.092289010321796, |
|
"grad_norm": 1.0187814235687256, |
|
"learning_rate": 1.779893117023784e-05, |
|
"loss": 0.1032, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 26.69945355191257, |
|
"grad_norm": 1.9439737796783447, |
|
"learning_rate": 1.665817123460074e-05, |
|
"loss": 0.0946, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 1.0169511616880038, |
|
"eval_loss": 0.03058658167719841, |
|
"eval_runtime": 64.6664, |
|
"eval_samples_per_second": 105.774, |
|
"eval_steps_per_second": 13.222, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 22248 |
|
}, |
|
{ |
|
"epoch": 27.306010928961747, |
|
"grad_norm": 1.2627675533294678, |
|
"learning_rate": 1.55076041338233e-05, |
|
"loss": 0.0936, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 27.91317547055252, |
|
"grad_norm": 0.8128781318664551, |
|
"learning_rate": 1.4354034835527018e-05, |
|
"loss": 0.09, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 1.0176920341394025, |
|
"eval_loss": 0.030218515545129776, |
|
"eval_runtime": 64.0242, |
|
"eval_samples_per_second": 106.835, |
|
"eval_steps_per_second": 13.354, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 23072 |
|
}, |
|
{ |
|
"epoch": 28.5197328476017, |
|
"grad_norm": 4.884032249450684, |
|
"learning_rate": 1.3206577220714804e-05, |
|
"loss": 0.0813, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 1.0178402086296823, |
|
"eval_loss": 0.03485483676195145, |
|
"eval_runtime": 62.0604, |
|
"eval_samples_per_second": 110.215, |
|
"eval_steps_per_second": 13.777, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 23896 |
|
}, |
|
{ |
|
"epoch": 29.12629022465088, |
|
"grad_norm": 0.34174931049346924, |
|
"learning_rate": 1.2067421110204709e-05, |
|
"loss": 0.0844, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 29.73345476624165, |
|
"grad_norm": 0.053012751042842865, |
|
"learning_rate": 1.0945609580796467e-05, |
|
"loss": 0.0806, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 1.0178698435277382, |
|
"eval_loss": 0.03685862198472023, |
|
"eval_runtime": 66.7781, |
|
"eval_samples_per_second": 102.429, |
|
"eval_steps_per_second": 12.804, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 24720 |
|
}, |
|
{ |
|
"epoch": 30.34001214329083, |
|
"grad_norm": 3.6521289348602295, |
|
"learning_rate": 9.847777526821669e-06, |
|
"loss": 0.0758, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 30.947176684881605, |
|
"grad_norm": 2.7911853790283203, |
|
"learning_rate": 8.780418017286117e-06, |
|
"loss": 0.0763, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 1.0164770033191086, |
|
"eval_loss": 0.04343733936548233, |
|
"eval_runtime": 63.0385, |
|
"eval_samples_per_second": 108.505, |
|
"eval_steps_per_second": 13.563, |
|
"eval_wer": 0.9989766081871345, |
|
"step": 25544 |
|
}, |
|
{ |
|
"epoch": 31.553734061930783, |
|
"grad_norm": 1.7392168045043945, |
|
"learning_rate": 7.749843892960228e-06, |
|
"loss": 0.075, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 1.0160028449502134, |
|
"eval_loss": 0.030797116458415985, |
|
"eval_runtime": 63.3897, |
|
"eval_samples_per_second": 107.904, |
|
"eval_steps_per_second": 13.488, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 26368 |
|
}, |
|
{ |
|
"epoch": 32.16029143897996, |
|
"grad_norm": 3.0110056400299072, |
|
"learning_rate": 6.764079092952775e-06, |
|
"loss": 0.0703, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 32.76745598057074, |
|
"grad_norm": 1.4404278993606567, |
|
"learning_rate": 5.8250048617236015e-06, |
|
"loss": 0.0708, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 1.0166844476055001, |
|
"eval_loss": 0.030816324055194855, |
|
"eval_runtime": 62.6517, |
|
"eval_samples_per_second": 109.175, |
|
"eval_steps_per_second": 13.647, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 27192 |
|
}, |
|
{ |
|
"epoch": 33.374013357619916, |
|
"grad_norm": 0.1344936639070511, |
|
"learning_rate": 4.940195648850366e-06, |
|
"loss": 0.0684, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 33.981177899210685, |
|
"grad_norm": 4.214531421661377, |
|
"learning_rate": 4.114884611130932e-06, |
|
"loss": 0.0668, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 1.0165659080132765, |
|
"eval_loss": 0.02984553575515747, |
|
"eval_runtime": 63.6993, |
|
"eval_samples_per_second": 107.38, |
|
"eval_steps_per_second": 13.422, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 28016 |
|
}, |
|
{ |
|
"epoch": 34.58773527625986, |
|
"grad_norm": 4.261690139770508, |
|
"learning_rate": 3.353953006586277e-06, |
|
"loss": 0.0639, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 1.0163880986249407, |
|
"eval_loss": 0.02722448669373989, |
|
"eval_runtime": 64.9453, |
|
"eval_samples_per_second": 105.319, |
|
"eval_steps_per_second": 13.165, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 28840 |
|
}, |
|
{ |
|
"epoch": 35.19429265330905, |
|
"grad_norm": 0.7786476016044617, |
|
"learning_rate": 2.6619013245208524e-06, |
|
"loss": 0.0622, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 35.80145719489982, |
|
"grad_norm": 0.006692953407764435, |
|
"learning_rate": 2.0439854900570527e-06, |
|
"loss": 0.0628, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 1.0160621147463254, |
|
"eval_loss": 0.02650887332856655, |
|
"eval_runtime": 64.4273, |
|
"eval_samples_per_second": 106.166, |
|
"eval_steps_per_second": 13.271, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 29664 |
|
}, |
|
{ |
|
"epoch": 36.408014571948996, |
|
"grad_norm": 2.9724912643432617, |
|
"learning_rate": 1.501384740615621e-06, |
|
"loss": 0.0628, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_cer": 1.0163288288288288, |
|
"eval_loss": 0.026661457493901253, |
|
"eval_runtime": 63.7838, |
|
"eval_samples_per_second": 107.237, |
|
"eval_steps_per_second": 13.405, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 30488 |
|
}, |
|
{ |
|
"epoch": 37.01457194899818, |
|
"grad_norm": 5.568892478942871, |
|
"learning_rate": 1.0386208296455812e-06, |
|
"loss": 0.0618, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 37.62173649058895, |
|
"grad_norm": 2.4032373428344727, |
|
"learning_rate": 6.584307495643449e-07, |
|
"loss": 0.0586, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_cer": 1.016151019440493, |
|
"eval_loss": 0.02632048726081848, |
|
"eval_runtime": 63.4705, |
|
"eval_samples_per_second": 107.767, |
|
"eval_steps_per_second": 13.471, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 31312 |
|
}, |
|
{ |
|
"epoch": 38.22829386763813, |
|
"grad_norm": 0.8156293630599976, |
|
"learning_rate": 3.6306311427998064e-07, |
|
"loss": 0.0599, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 38.8354584092289, |
|
"grad_norm": 6.638393878936768, |
|
"learning_rate": 1.5426485988442763e-07, |
|
"loss": 0.058, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_cer": 1.0164473684210527, |
|
"eval_loss": 0.028048371896147728, |
|
"eval_runtime": 62.7293, |
|
"eval_samples_per_second": 109.04, |
|
"eval_steps_per_second": 13.63, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 32136 |
|
}, |
|
{ |
|
"epoch": 39.442015786278084, |
|
"grad_norm": 0.12612353265285492, |
|
"learning_rate": 3.327091249336667e-08, |
|
"loss": 0.0588, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 39.95203400121433, |
|
"eval_cer": 1.0163584637268848, |
|
"eval_loss": 0.02687981352210045, |
|
"eval_runtime": 63.6993, |
|
"eval_samples_per_second": 107.38, |
|
"eval_steps_per_second": 13.422, |
|
"eval_wer": 0.9988304093567252, |
|
"step": 32920 |
|
}, |
|
{ |
|
"epoch": 39.95203400121433, |
|
"step": 32920, |
|
"total_flos": 1.7147211678918107e+19, |
|
"train_loss": 1.2561371190290116, |
|
"train_runtime": 30821.1471, |
|
"train_samples_per_second": 68.369, |
|
"train_steps_per_second": 1.068 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 32920, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.7147211678918107e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|