Iskaj commited on
Commit
fcf3a29
·
1 Parent(s): 8b9b8d0

added non-lm model

Browse files
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
+ "activation_dropout": 0.0,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 24,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1024,
80
+ "pad_token_id": 47,
81
+ "proj_codevector_dim": 768,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.15.0",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 48,
107
+ "xvector_output_dim": 512
108
+ }
eval.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import re
4
+ from typing import Dict
5
+
6
+ from datasets import Audio, Dataset, load_dataset, load_metric
7
+
8
+ from transformers import AutoFeatureExtractor, pipeline
9
+
10
+
11
+ def log_results(result: Dataset, args: Dict[str, str]):
12
+ """DO NOT CHANGE. This function computes and logs the result metrics."""
13
+
14
+ log_outputs = args.log_outputs
15
+ dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
16
+
17
+ # load metric
18
+ wer = load_metric("wer")
19
+ cer = load_metric("cer")
20
+
21
+ # compute metrics
22
+ wer_result = wer.compute(references=result["target"], predictions=result["prediction"])
23
+ cer_result = cer.compute(references=result["target"], predictions=result["prediction"])
24
+
25
+ # print & log results
26
+ result_str = f"WER: {wer_result}\n" f"CER: {cer_result}"
27
+ print(result_str)
28
+
29
+ with open(f"{dataset_id}_eval_results.txt", "w") as f:
30
+ f.write(result_str)
31
+
32
+ # log all results in text file. Possibly interesting for analysis
33
+ if log_outputs is not None:
34
+ pred_file = f"log_{dataset_id}_predictions.txt"
35
+ target_file = f"log_{dataset_id}_targets.txt"
36
+
37
+ with open(pred_file, "w") as p, open(target_file, "w") as t:
38
+
39
+ # mapping function to write output
40
+ def write_to_file(batch, i):
41
+ p.write(f"{i}" + "\n")
42
+ p.write(batch["prediction"] + "\n")
43
+ t.write(f"{i}" + "\n")
44
+ t.write(batch["target"] + "\n")
45
+
46
+ result.map(write_to_file, with_indices=True)
47
+
48
+
49
+ def normalize_text(text: str) -> str:
50
+ """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
51
+
52
+ chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
53
+
54
+ text = re.sub(chars_to_ignore_regex, "", text.lower())
55
+
56
+ # In addition, we can normalize the target text, e.g. removing new lines characters etc...
57
+ # note that order is important here!
58
+ token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
59
+
60
+ for t in token_sequences_to_ignore:
61
+ text = " ".join(text.split(t))
62
+
63
+ return text
64
+
65
+
66
+ def main(args):
67
+ # load dataset
68
+ dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
69
+
70
+ # for testing: only process the first two examples as a test
71
+ # dataset = dataset.select(range(10))
72
+
73
+ # load processor
74
+ feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
75
+ sampling_rate = feature_extractor.sampling_rate
76
+
77
+ # resample audio
78
+ dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
79
+
80
+ # load eval pipeline
81
+ asr = pipeline("automatic-speech-recognition", model=args.model_id)
82
+
83
+ # map function to decode audio
84
+ def map_to_pred(batch):
85
+ prediction = asr(
86
+ batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
87
+ )
88
+
89
+ batch["prediction"] = prediction["text"]
90
+ batch["target"] = normalize_text(batch["sentence"])
91
+ return batch
92
+
93
+ # run inference on all examples
94
+ result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
95
+
96
+ # compute and log_results
97
+ # do not change function below
98
+ log_results(result, args)
99
+
100
+
101
+ if __name__ == "__main__":
102
+ parser = argparse.ArgumentParser()
103
+
104
+ parser.add_argument(
105
+ "--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers"
106
+ )
107
+ parser.add_argument(
108
+ "--dataset",
109
+ type=str,
110
+ required=True,
111
+ help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets",
112
+ )
113
+ parser.add_argument(
114
+ "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
115
+ )
116
+ parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
117
+ parser.add_argument(
118
+ "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."
119
+ )
120
+ parser.add_argument(
121
+ "--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to 1 second."
122
+ )
123
+ parser.add_argument(
124
+ "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
125
+ )
126
+ args = parser.parse_args()
127
+
128
+ main(args)
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13864c200105334bc74b14c839995534074b95509bf8d083786727872bd6b244
3
+ size 2490452561
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:953b1e8b3f3e89ad3eeea3584fb73612d35e6a7a66a2856d857e4f3457345993
3
+ size 1262120497
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14af191b274fbff12408b4f1c107029e13ddf71394ee229655ff49c2db675fb5
3
+ size 14567
run_speech_recognition_ctc.py ADDED
@@ -0,0 +1,737 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ # Copyright 2021 The HuggingFace Inc. team. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+
16
+ """ Fine-tuning a 🤗 Transformers CTC model for automatic speech recognition"""
17
+
18
+ import functools
19
+ import json
20
+ import logging
21
+ import os
22
+ import re
23
+ import sys
24
+ import warnings
25
+ from dataclasses import dataclass, field
26
+ from typing import Dict, List, Optional, Union
27
+
28
+ import datasets
29
+ import numpy as np
30
+ import torch
31
+ from datasets import DatasetDict, load_dataset, load_metric
32
+
33
+ import transformers
34
+ from transformers import (
35
+ AutoConfig,
36
+ AutoFeatureExtractor,
37
+ AutoModelForCTC,
38
+ AutoProcessor,
39
+ AutoTokenizer,
40
+ HfArgumentParser,
41
+ Trainer,
42
+ TrainingArguments,
43
+ Wav2Vec2Processor,
44
+ set_seed,
45
+ )
46
+ from transformers.trainer_utils import get_last_checkpoint, is_main_process
47
+ from transformers.utils import check_min_version
48
+ from transformers.utils.versions import require_version
49
+
50
+
51
+ # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
52
+ check_min_version("4.16.0.dev0")
53
+
54
+ require_version("datasets>=1.13.3", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
55
+
56
+
57
+ logger = logging.getLogger(__name__)
58
+
59
+
60
+ def list_field(default=None, metadata=None):
61
+ return field(default_factory=lambda: default, metadata=metadata)
62
+
63
+
64
+ @dataclass
65
+ class ModelArguments:
66
+ """
67
+ Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
68
+ """
69
+
70
+ model_name_or_path: str = field(
71
+ metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
72
+ )
73
+ tokenizer_name_or_path: Optional[str] = field(
74
+ default=None,
75
+ metadata={"help": "Path to pretrained tokenizer or tokenizer identifier from huggingface.co/models"},
76
+ )
77
+ cache_dir: Optional[str] = field(
78
+ default=None,
79
+ metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
80
+ )
81
+ freeze_feature_encoder: bool = field(
82
+ default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
83
+ )
84
+ attention_dropout: float = field(
85
+ default=0.0, metadata={"help": "The dropout ratio for the attention probabilities."}
86
+ )
87
+ activation_dropout: float = field(
88
+ default=0.0, metadata={"help": "The dropout ratio for activations inside the fully connected layer."}
89
+ )
90
+ feat_proj_dropout: float = field(default=0.0, metadata={"help": "The dropout ratio for the projected features."})
91
+ hidden_dropout: float = field(
92
+ default=0.0,
93
+ metadata={
94
+ "help": "The dropout probability for all fully connected layers in the embeddings, encoder, and pooler."
95
+ },
96
+ )
97
+ final_dropout: float = field(
98
+ default=0.0,
99
+ metadata={"help": "The dropout probability for the final projection layer."},
100
+ )
101
+ mask_time_prob: float = field(
102
+ default=0.05,
103
+ metadata={
104
+ "help": "Probability of each feature vector along the time axis to be chosen as the start of the vector"
105
+ "span to be masked. Approximately ``mask_time_prob * sequence_length // mask_time_length`` feature"
106
+ "vectors will be masked along the time axis."
107
+ },
108
+ )
109
+ mask_time_length: int = field(
110
+ default=10,
111
+ metadata={"help": "Length of vector span to mask along the time axis."},
112
+ )
113
+ mask_feature_prob: float = field(
114
+ default=0.0,
115
+ metadata={
116
+ "help": "Probability of each feature vector along the feature axis to be chosen as the start of the vector"
117
+ "span to be masked. Approximately ``mask_feature_prob * sequence_length // mask_feature_length`` feature bins will be masked along the time axis."
118
+ },
119
+ )
120
+ mask_feature_length: int = field(
121
+ default=10,
122
+ metadata={"help": "Length of vector span to mask along the feature axis."},
123
+ )
124
+ layerdrop: float = field(default=0.0, metadata={"help": "The LayerDrop probability."})
125
+ ctc_loss_reduction: Optional[str] = field(
126
+ default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
127
+ )
128
+
129
+
130
+ @dataclass
131
+ class DataTrainingArguments:
132
+ """
133
+ Arguments pertaining to what data we are going to input our model for training and eval.
134
+
135
+ Using `HfArgumentParser` we can turn this class
136
+ into argparse arguments to be able to specify them on
137
+ the command line.
138
+ """
139
+
140
+ dataset_name: str = field(
141
+ metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
142
+ )
143
+ dataset_config_name: str = field(
144
+ default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
145
+ )
146
+ train_split_name: str = field(
147
+ default="train+validation",
148
+ metadata={
149
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
150
+ },
151
+ )
152
+ eval_split_name: str = field(
153
+ default="test",
154
+ metadata={
155
+ "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
156
+ },
157
+ )
158
+ audio_column_name: str = field(
159
+ default="audio",
160
+ metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
161
+ )
162
+ text_column_name: str = field(
163
+ default="text",
164
+ metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"},
165
+ )
166
+ overwrite_cache: bool = field(
167
+ default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
168
+ )
169
+ preprocessing_num_workers: Optional[int] = field(
170
+ default=None,
171
+ metadata={"help": "The number of processes to use for the preprocessing."},
172
+ )
173
+ max_train_samples: Optional[int] = field(
174
+ default=None,
175
+ metadata={
176
+ "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
177
+ "value if set."
178
+ },
179
+ )
180
+ max_eval_samples: Optional[int] = field(
181
+ default=None,
182
+ metadata={
183
+ "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
184
+ "value if set."
185
+ },
186
+ )
187
+ chars_to_ignore: Optional[List[str]] = list_field(
188
+ default=None,
189
+ metadata={"help": "A list of characters to remove from the transcripts."},
190
+ )
191
+ eval_metrics: List[str] = list_field(
192
+ default=["wer"],
193
+ metadata={"help": "A list of metrics the model should be evaluated on. E.g. `'wer cer'`"},
194
+ )
195
+ max_duration_in_seconds: float = field(
196
+ default=20.0,
197
+ metadata={
198
+ "help": "Filter audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`"
199
+ },
200
+ )
201
+ min_duration_in_seconds: float = field(
202
+ default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"}
203
+ )
204
+ preprocessing_only: bool = field(
205
+ default=False,
206
+ metadata={
207
+ "help": "Whether to only do data preprocessing and skip training. "
208
+ "This is especially useful when data preprocessing errors out in distributed training due to timeout. "
209
+ "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` "
210
+ "so that the cached datasets can consequently be loaded in distributed training"
211
+ },
212
+ )
213
+ use_auth_token: bool = field(
214
+ default=False,
215
+ metadata={
216
+ "help": "If :obj:`True`, will use the token generated when running"
217
+ ":obj:`transformers-cli login` as HTTP bearer authorization for remote files."
218
+ },
219
+ )
220
+ unk_token: str = field(
221
+ default="[UNK]",
222
+ metadata={"help": "The unk token for the tokenizer"},
223
+ )
224
+ pad_token: str = field(
225
+ default="[PAD]",
226
+ metadata={"help": "The padding token for the tokenizer"},
227
+ )
228
+ word_delimiter_token: str = field(
229
+ default="|",
230
+ metadata={"help": "The word delimiter token for the tokenizer"},
231
+ )
232
+ phoneme_language: Optional[str] = field(
233
+ default=None,
234
+ metadata={
235
+ "help": "The target language that should be used be"
236
+ " passed to the tokenizer for tokenization. Note that"
237
+ " this is only relevant if the model classifies the"
238
+ " input audio to a sequence of phoneme sequences."
239
+ },
240
+ )
241
+
242
+
243
+ @dataclass
244
+ class DataCollatorCTCWithPadding:
245
+ """
246
+ Data collator that will dynamically pad the inputs received.
247
+ Args:
248
+ processor (:class:`~transformers.AutoProcessor`)
249
+ The processor used for proccessing the data.
250
+ padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):
251
+ Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
252
+ among:
253
+ * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
254
+ sequence if provided).
255
+ * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
256
+ maximum acceptable input length for the model if that argument is not provided.
257
+ * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
258
+ different lengths).
259
+ max_length (:obj:`int`, `optional`):
260
+ Maximum length of the ``input_values`` of the returned list and optionally padding length (see above).
261
+ max_length_labels (:obj:`int`, `optional`):
262
+ Maximum length of the ``labels`` returned list and optionally padding length (see above).
263
+ pad_to_multiple_of (:obj:`int`, `optional`):
264
+ If set will pad the sequence to a multiple of the provided value.
265
+ This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
266
+ 7.5 (Volta).
267
+ """
268
+
269
+ processor: AutoProcessor
270
+ padding: Union[bool, str] = "longest"
271
+ pad_to_multiple_of: Optional[int] = None
272
+ pad_to_multiple_of_labels: Optional[int] = None
273
+
274
+ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
275
+ # split inputs and labels since they have to be of different lenghts and need
276
+ # different padding methods
277
+ input_features = [{"input_values": feature["input_values"]} for feature in features]
278
+ label_features = [{"input_ids": feature["labels"]} for feature in features]
279
+
280
+ batch = self.processor.pad(
281
+ input_features,
282
+ padding=self.padding,
283
+ pad_to_multiple_of=self.pad_to_multiple_of,
284
+ return_tensors="pt",
285
+ )
286
+
287
+ with self.processor.as_target_processor():
288
+ labels_batch = self.processor.pad(
289
+ label_features,
290
+ padding=self.padding,
291
+ pad_to_multiple_of=self.pad_to_multiple_of_labels,
292
+ return_tensors="pt",
293
+ )
294
+
295
+ # replace padding with -100 to ignore loss correctly
296
+ labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
297
+
298
+ batch["labels"] = labels
299
+
300
+ return batch
301
+
302
+
303
+ def create_vocabulary_from_data(
304
+ datasets: DatasetDict,
305
+ word_delimiter_token: Optional[str] = None,
306
+ unk_token: Optional[str] = None,
307
+ pad_token: Optional[str] = None,
308
+ ):
309
+ # Given training and test labels create vocabulary
310
+ def extract_all_chars(batch):
311
+ all_text = " ".join(batch["target_text"])
312
+ vocab = list(set(all_text))
313
+ return {"vocab": [vocab], "all_text": [all_text]}
314
+
315
+ vocabs = datasets.map(
316
+ extract_all_chars,
317
+ batched=True,
318
+ batch_size=-1,
319
+ keep_in_memory=True,
320
+ remove_columns=datasets["train"].column_names,
321
+ )
322
+
323
+ # take union of all unique characters in each dataset
324
+ vocab_set = functools.reduce(
325
+ lambda vocab_1, vocab_2: set(vocab_1["vocab"][0]) | set(vocab_2["vocab"][0]), vocabs.values()
326
+ )
327
+
328
+ vocab_dict = {v: k for k, v in enumerate(sorted(list(vocab_set)))}
329
+
330
+ # replace white space with delimiter token
331
+ if word_delimiter_token is not None:
332
+ vocab_dict[word_delimiter_token] = vocab_dict[" "]
333
+ del vocab_dict[" "]
334
+
335
+ # add unk and pad token
336
+ if unk_token is not None:
337
+ vocab_dict[unk_token] = len(vocab_dict)
338
+
339
+ if pad_token is not None:
340
+ vocab_dict[pad_token] = len(vocab_dict)
341
+
342
+ return vocab_dict
343
+
344
+
345
+ def main():
346
+ # See all possible arguments in src/transformers/training_args.py
347
+ # or by passing the --help flag to this script.
348
+ # We now keep distinct sets of args, for a cleaner separation of concerns.
349
+
350
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
351
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
352
+ # If we pass only one argument to the script and it's the path to a json file,
353
+ # let's parse it to get our arguments.
354
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
355
+ else:
356
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
357
+
358
+ # Detecting last checkpoint.
359
+ last_checkpoint = None
360
+ if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
361
+ last_checkpoint = get_last_checkpoint(training_args.output_dir)
362
+ if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
363
+ raise ValueError(
364
+ f"Output directory ({training_args.output_dir}) already exists and is not empty. "
365
+ "Use --overwrite_output_dir to overcome."
366
+ )
367
+ elif last_checkpoint is not None:
368
+ logger.info(
369
+ f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
370
+ "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
371
+ )
372
+
373
+ # Setup logging
374
+ logging.basicConfig(
375
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
376
+ datefmt="%m/%d/%Y %H:%M:%S",
377
+ handlers=[logging.StreamHandler(sys.stdout)],
378
+ )
379
+ logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
380
+
381
+ # Log on each process the small summary:
382
+ logger.warning(
383
+ f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
384
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
385
+ )
386
+ # Set the verbosity to info of the Transformers logger (on main process only):
387
+ if is_main_process(training_args.local_rank):
388
+ transformers.utils.logging.set_verbosity_info()
389
+ logger.info("Training/evaluation parameters %s", training_args)
390
+
391
+ # Set seed before initializing model.
392
+ set_seed(training_args.seed)
393
+
394
+ # 1. First, let's load the dataset
395
+ raw_datasets = DatasetDict()
396
+
397
+ if training_args.do_train:
398
+ raw_datasets["train"] = load_dataset(
399
+ data_args.dataset_name,
400
+ data_args.dataset_config_name,
401
+ split=data_args.train_split_name,
402
+ use_auth_token=data_args.use_auth_token,
403
+ )
404
+
405
+ if data_args.audio_column_name not in raw_datasets["train"].column_names:
406
+ raise ValueError(
407
+ f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. "
408
+ "Make sure to set `--audio_column_name` to the correct audio column - one of "
409
+ f"{', '.join(raw_datasets['train'].column_names)}."
410
+ )
411
+
412
+ if data_args.text_column_name not in raw_datasets["train"].column_names:
413
+ raise ValueError(
414
+ f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. "
415
+ "Make sure to set `--text_column_name` to the correct text column - one of "
416
+ f"{', '.join(raw_datasets['train'].column_names)}."
417
+ )
418
+
419
+ if data_args.max_train_samples is not None:
420
+ raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples))
421
+
422
+ if training_args.do_eval:
423
+ raw_datasets["eval"] = load_dataset(
424
+ data_args.dataset_name,
425
+ data_args.dataset_config_name,
426
+ split=data_args.eval_split_name,
427
+ use_auth_token=data_args.use_auth_token,
428
+ )
429
+
430
+ if data_args.max_eval_samples is not None:
431
+ raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples))
432
+
433
+ # 2. We remove some special characters from the datasets
434
+ # that make training complicated and do not help in transcribing the speech
435
+ # E.g. characters, such as `,` and `.` do not really have an acoustic characteristic
436
+ # that could be easily picked up by the model
437
+ chars_to_ignore_regex = (
438
+ f'[{"".join(data_args.chars_to_ignore)}]' if data_args.chars_to_ignore is not None else None
439
+ )
440
+ text_column_name = data_args.text_column_name
441
+
442
+ def remove_special_characters(batch):
443
+ if chars_to_ignore_regex is not None:
444
+ batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower() + " "
445
+ else:
446
+ batch["target_text"] = batch[text_column_name].lower() + " "
447
+ return batch
448
+
449
+ with training_args.main_process_first(desc="dataset map special characters removal"):
450
+ raw_datasets = raw_datasets.map(
451
+ remove_special_characters,
452
+ remove_columns=[text_column_name],
453
+ desc="remove special characters from datasets",
454
+ )
455
+
456
+ # save special tokens for tokenizer
457
+ word_delimiter_token = data_args.word_delimiter_token
458
+ unk_token = data_args.unk_token
459
+ pad_token = data_args.pad_token
460
+
461
+ # 3. Next, let's load the config as we might need it to create
462
+ # the tokenizer
463
+ # load config
464
+ config = AutoConfig.from_pretrained(
465
+ model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
466
+ )
467
+
468
+ # 4. Next, if no tokenizer file is defined,
469
+ # we create the vocabulary of the model by extracting all unique characters from
470
+ # the training and evaluation datasets
471
+ # We need to make sure that only first rank saves vocabulary
472
+ # make sure all processes wait until vocab is created
473
+ tokenizer_name_or_path = model_args.tokenizer_name_or_path
474
+ tokenizer_kwargs = {}
475
+ if tokenizer_name_or_path is None:
476
+ # save vocab in training output dir
477
+ tokenizer_name_or_path = training_args.output_dir
478
+
479
+ vocab_file = os.path.join(tokenizer_name_or_path, "vocab.json")
480
+
481
+ with training_args.main_process_first():
482
+ if training_args.overwrite_output_dir and os.path.isfile(vocab_file):
483
+ os.remove(vocab_file)
484
+
485
+ with training_args.main_process_first(desc="dataset map vocabulary creation"):
486
+ if not os.path.isfile(vocab_file):
487
+ os.makedirs(tokenizer_name_or_path, exist_ok=True)
488
+ vocab_dict = create_vocabulary_from_data(
489
+ raw_datasets,
490
+ word_delimiter_token=word_delimiter_token,
491
+ unk_token=unk_token,
492
+ pad_token=pad_token,
493
+ )
494
+
495
+ # save vocab dict to be loaded into tokenizer
496
+ with open(vocab_file, "w") as file:
497
+ json.dump(vocab_dict, file)
498
+
499
+ # if tokenizer has just been created
500
+ # it is defined by `tokenizer_class` if present in config else by `model_type`
501
+ tokenizer_kwargs = {
502
+ "config": config if config.tokenizer_class is not None else None,
503
+ "tokenizer_type": config.model_type if config.tokenizer_class is None else None,
504
+ "unk_token": unk_token,
505
+ "pad_token": pad_token,
506
+ "word_delimiter_token": word_delimiter_token,
507
+ }
508
+
509
+ # 5. Now we can instantiate the feature extractor, tokenizer and model
510
+ # Note for distributed training, the .from_pretrained methods guarantee that only
511
+ # one local process can concurrently download model & vocab.
512
+
513
+ # load feature_extractor and tokenizer
514
+ tokenizer = AutoTokenizer.from_pretrained(
515
+ tokenizer_name_or_path,
516
+ use_auth_token=data_args.use_auth_token,
517
+ **tokenizer_kwargs,
518
+ )
519
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
520
+ model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
521
+ )
522
+
523
+ # adapt config
524
+ config.update(
525
+ {
526
+ "feat_proj_dropout": model_args.feat_proj_dropout,
527
+ "attention_dropout": model_args.attention_dropout,
528
+ "hidden_dropout": model_args.hidden_dropout,
529
+ "final_dropout": model_args.final_dropout,
530
+ "mask_time_prob": model_args.mask_time_prob,
531
+ "mask_time_length": model_args.mask_time_length,
532
+ "mask_feature_prob": model_args.mask_feature_prob,
533
+ "mask_feature_length": model_args.mask_feature_length,
534
+ "gradient_checkpointing": training_args.gradient_checkpointing,
535
+ "layerdrop": model_args.layerdrop,
536
+ "ctc_loss_reduction": model_args.ctc_loss_reduction,
537
+ "pad_token_id": tokenizer.pad_token_id,
538
+ "vocab_size": len(tokenizer),
539
+ "activation_dropout": model_args.activation_dropout,
540
+ }
541
+ )
542
+
543
+ # create model
544
+ model = AutoModelForCTC.from_pretrained(
545
+ model_args.model_name_or_path,
546
+ cache_dir=model_args.cache_dir,
547
+ config=config,
548
+ use_auth_token=data_args.use_auth_token,
549
+ )
550
+
551
+ # freeze encoder
552
+ if model_args.freeze_feature_encoder:
553
+ model.freeze_feature_encoder()
554
+
555
+ # 6. Now we preprocess the datasets including loading the audio, resampling and normalization
556
+ # Thankfully, `datasets` takes care of automatically loading and resampling the audio,
557
+ # so that we just need to set the correct target sampling rate and normalize the input
558
+ # via the `feature_extractor`
559
+
560
+ # make sure that dataset decodes audio with correct sampling rate
561
+ dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
562
+ if dataset_sampling_rate != feature_extractor.sampling_rate:
563
+ raw_datasets = raw_datasets.cast_column(
564
+ data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
565
+ )
566
+
567
+ # derive max & min input length for sample rate & max duration
568
+ max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
569
+ min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
570
+ audio_column_name = data_args.audio_column_name
571
+ num_workers = data_args.preprocessing_num_workers
572
+
573
+ # `phoneme_language` is only relevant if the model is fine-tuned on phoneme classification
574
+ phoneme_language = data_args.phoneme_language
575
+
576
+ # Preprocessing the datasets.
577
+ # We need to read the audio files as arrays and tokenize the targets.
578
+ def prepare_dataset(batch):
579
+ # load audio
580
+ sample = batch[audio_column_name]
581
+
582
+ inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
583
+ batch["input_values"] = inputs.input_values[0]
584
+ batch["input_length"] = len(batch["input_values"])
585
+
586
+ # encode targets
587
+ additional_kwargs = {}
588
+ if phoneme_language is not None:
589
+ additional_kwargs["phonemizer_lang"] = phoneme_language
590
+
591
+ batch["labels"] = tokenizer(batch["target_text"], **additional_kwargs).input_ids
592
+ return batch
593
+
594
+ with training_args.main_process_first(desc="dataset map preprocessing"):
595
+ vectorized_datasets = raw_datasets.map(
596
+ prepare_dataset,
597
+ remove_columns=next(iter(raw_datasets.values())).column_names,
598
+ num_proc=num_workers,
599
+ desc="preprocess datasets",
600
+ )
601
+
602
+ def is_audio_in_length_range(length):
603
+ return length > min_input_length and length < max_input_length
604
+
605
+ # filter data that is shorter than min_input_length
606
+ vectorized_datasets = vectorized_datasets.filter(
607
+ is_audio_in_length_range,
608
+ num_proc=num_workers,
609
+ input_columns=["input_length"],
610
+ )
611
+
612
+ # 7. Next, we can prepare the training.
613
+ # Let's use word error rate (WER) as our evaluation metric,
614
+ # instantiate a data collator and the trainer
615
+
616
+ # Define evaluation metrics during training, *i.e.* word error rate, character error rate
617
+ eval_metrics = {metric: load_metric(metric) for metric in data_args.eval_metrics}
618
+
619
+ # for large datasets it is advised to run the preprocessing on a
620
+ # single machine first with ``args.preprocessing_only`` since there will mostly likely
621
+ # be a timeout when running the script in distributed mode.
622
+ # In a second step ``args.preprocessing_only`` can then be set to `False` to load the
623
+ # cached dataset
624
+ if data_args.preprocessing_only:
625
+ logger.info(f"Data preprocessing finished. Files cached at {vectorized_datasets.cache_files}")
626
+ return
627
+
628
+ def compute_metrics(pred):
629
+ pred_logits = pred.predictions
630
+ pred_ids = np.argmax(pred_logits, axis=-1)
631
+
632
+ pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
633
+
634
+ pred_str = tokenizer.batch_decode(pred_ids)
635
+ # we do not want to group tokens when computing the metrics
636
+ label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
637
+
638
+ metrics = {k: v.compute(predictions=pred_str, references=label_str) for k, v in eval_metrics.items()}
639
+
640
+ return metrics
641
+
642
+ # Now save everything to be able to create a single processor later
643
+ if is_main_process(training_args.local_rank):
644
+ # save feature extractor, tokenizer and config
645
+ feature_extractor.save_pretrained(training_args.output_dir)
646
+ tokenizer.save_pretrained(training_args.output_dir)
647
+ config.save_pretrained(training_args.output_dir)
648
+
649
+ try:
650
+ processor = AutoProcessor.from_pretrained(training_args.output_dir)
651
+ except (OSError, KeyError):
652
+ warnings.warn(
653
+ "Loading a processor from a feature extractor config that does not"
654
+ " include a `processor_class` attribute is deprecated and will be removed in v5. Please add the following "
655
+ " attribute to your `preprocessor_config.json` file to suppress this warning: "
656
+ " `'processor_class': 'Wav2Vec2Processor'`",
657
+ FutureWarning,
658
+ )
659
+ processor = Wav2Vec2Processor.from_pretrained(training_args.output_dir)
660
+
661
+ # Instantiate custom data collator
662
+ data_collator = DataCollatorCTCWithPadding(processor=processor)
663
+
664
+ # Initialize Trainer
665
+ trainer = Trainer(
666
+ model=model,
667
+ data_collator=data_collator,
668
+ args=training_args,
669
+ compute_metrics=compute_metrics,
670
+ train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
671
+ eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
672
+ tokenizer=feature_extractor,
673
+ )
674
+
675
+ # 8. Finally, we can start training
676
+
677
+ # Training
678
+ if training_args.do_train:
679
+
680
+ # use last checkpoint if exist
681
+ if last_checkpoint is not None:
682
+ checkpoint = last_checkpoint
683
+ elif os.path.isdir(model_args.model_name_or_path):
684
+ checkpoint = model_args.model_name_or_path
685
+ else:
686
+ checkpoint = None
687
+
688
+ train_result = trainer.train(resume_from_checkpoint=checkpoint)
689
+ trainer.save_model()
690
+
691
+ metrics = train_result.metrics
692
+ max_train_samples = (
693
+ data_args.max_train_samples
694
+ if data_args.max_train_samples is not None
695
+ else len(vectorized_datasets["train"])
696
+ )
697
+ metrics["train_samples"] = min(max_train_samples, len(vectorized_datasets["train"]))
698
+
699
+ trainer.log_metrics("train", metrics)
700
+ trainer.save_metrics("train", metrics)
701
+ trainer.save_state()
702
+
703
+ # Evaluation
704
+ results = {}
705
+ if training_args.do_eval:
706
+ logger.info("*** Evaluate ***")
707
+ metrics = trainer.evaluate()
708
+ max_eval_samples = (
709
+ data_args.max_eval_samples if data_args.max_eval_samples is not None else len(vectorized_datasets["eval"])
710
+ )
711
+ metrics["eval_samples"] = min(max_eval_samples, len(vectorized_datasets["eval"]))
712
+
713
+ trainer.log_metrics("eval", metrics)
714
+ trainer.save_metrics("eval", metrics)
715
+
716
+ # Write model card and (optionally) push to hub
717
+ config_name = data_args.dataset_config_name if data_args.dataset_config_name is not None else "na"
718
+ kwargs = {
719
+ "finetuned_from": model_args.model_name_or_path,
720
+ "tasks": "speech-recognition",
721
+ "tags": ["automatic-speech-recognition", data_args.dataset_name],
722
+ "dataset_args": f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split: {data_args.eval_split_name}",
723
+ "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}",
724
+ }
725
+ if "common_voice" in data_args.dataset_name:
726
+ kwargs["language"] = config_name
727
+
728
+ if training_args.push_to_hub:
729
+ trainer.push_to_hub(**kwargs)
730
+ else:
731
+ trainer.create_model_card(**kwargs)
732
+
733
+ return results
734
+
735
+
736
+ if __name__ == "__main__":
737
+ main()
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1d84b098e761dbe1a7bf47d61c4d85eee1327725e6c25489fcbd7fe8775627f
3
+ size 559
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10ab802c012038781dab6e6f375004396edc5d8092873b9f9fd25f37a37d2782
3
+ size 623
trainer_state.json ADDED
@@ -0,0 +1,3835 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4362342655658722,
3
+ "best_model_checkpoint": "/local_disk0/X-LSR_CV_60_EP/checkpoint-2400",
4
+ "epoch": 59.55555555555556,
5
+ "global_step": 26800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.11,
12
+ "learning_rate": 2.9999999999999997e-05,
13
+ "loss": 11.3191,
14
+ "step": 50
15
+ },
16
+ {
17
+ "epoch": 0.22,
18
+ "learning_rate": 5.9999999999999995e-05,
19
+ "loss": 5.2444,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 0.33,
24
+ "learning_rate": 8.999999999999999e-05,
25
+ "loss": 3.4938,
26
+ "step": 150
27
+ },
28
+ {
29
+ "epoch": 0.44,
30
+ "learning_rate": 0.00011999999999999999,
31
+ "loss": 3.0165,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.56,
36
+ "learning_rate": 0.00015,
37
+ "loss": 2.9377,
38
+ "step": 250
39
+ },
40
+ {
41
+ "epoch": 0.67,
42
+ "learning_rate": 0.00017999999999999998,
43
+ "loss": 2.9175,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 0.78,
48
+ "learning_rate": 0.00020999999999999998,
49
+ "loss": 2.8908,
50
+ "step": 350
51
+ },
52
+ {
53
+ "epoch": 0.89,
54
+ "learning_rate": 0.00023999999999999998,
55
+ "loss": 2.8684,
56
+ "step": 400
57
+ },
58
+ {
59
+ "epoch": 0.89,
60
+ "eval_loss": 2.944436550140381,
61
+ "eval_runtime": 232.2253,
62
+ "eval_samples_per_second": 24.58,
63
+ "eval_steps_per_second": 3.075,
64
+ "eval_wer": 0.9999796528781004,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "learning_rate": 0.00027,
70
+ "loss": 2.8525,
71
+ "step": 450
72
+ },
73
+ {
74
+ "epoch": 1.11,
75
+ "learning_rate": 0.0003,
76
+ "loss": 2.6961,
77
+ "step": 500
78
+ },
79
+ {
80
+ "epoch": 1.22,
81
+ "learning_rate": 0.0002994339622641509,
82
+ "loss": 1.6145,
83
+ "step": 550
84
+ },
85
+ {
86
+ "epoch": 1.33,
87
+ "learning_rate": 0.00029886792452830187,
88
+ "loss": 1.0359,
89
+ "step": 600
90
+ },
91
+ {
92
+ "epoch": 1.44,
93
+ "learning_rate": 0.0002983018867924528,
94
+ "loss": 0.8086,
95
+ "step": 650
96
+ },
97
+ {
98
+ "epoch": 1.56,
99
+ "learning_rate": 0.00029773584905660376,
100
+ "loss": 0.7171,
101
+ "step": 700
102
+ },
103
+ {
104
+ "epoch": 1.67,
105
+ "learning_rate": 0.0002971698113207547,
106
+ "loss": 0.6152,
107
+ "step": 750
108
+ },
109
+ {
110
+ "epoch": 1.78,
111
+ "learning_rate": 0.00029660377358490565,
112
+ "loss": 0.5631,
113
+ "step": 800
114
+ },
115
+ {
116
+ "epoch": 1.78,
117
+ "eval_loss": 0.641059398651123,
118
+ "eval_runtime": 232.9944,
119
+ "eval_samples_per_second": 24.498,
120
+ "eval_steps_per_second": 3.064,
121
+ "eval_wer": 0.5468289010519461,
122
+ "step": 800
123
+ },
124
+ {
125
+ "epoch": 1.89,
126
+ "learning_rate": 0.00029603773584905657,
127
+ "loss": 0.521,
128
+ "step": 850
129
+ },
130
+ {
131
+ "epoch": 2.0,
132
+ "learning_rate": 0.00029547169811320755,
133
+ "loss": 0.4953,
134
+ "step": 900
135
+ },
136
+ {
137
+ "epoch": 2.11,
138
+ "learning_rate": 0.00029490566037735847,
139
+ "loss": 0.4331,
140
+ "step": 950
141
+ },
142
+ {
143
+ "epoch": 2.22,
144
+ "learning_rate": 0.0002943396226415094,
145
+ "loss": 0.396,
146
+ "step": 1000
147
+ },
148
+ {
149
+ "epoch": 2.33,
150
+ "learning_rate": 0.00029377358490566036,
151
+ "loss": 0.4066,
152
+ "step": 1050
153
+ },
154
+ {
155
+ "epoch": 2.44,
156
+ "learning_rate": 0.0002932075471698113,
157
+ "loss": 0.3839,
158
+ "step": 1100
159
+ },
160
+ {
161
+ "epoch": 2.56,
162
+ "learning_rate": 0.00029264150943396225,
163
+ "loss": 0.3705,
164
+ "step": 1150
165
+ },
166
+ {
167
+ "epoch": 2.67,
168
+ "learning_rate": 0.00029207547169811317,
169
+ "loss": 0.3707,
170
+ "step": 1200
171
+ },
172
+ {
173
+ "epoch": 2.67,
174
+ "eval_loss": 0.5500322580337524,
175
+ "eval_runtime": 232.2885,
176
+ "eval_samples_per_second": 24.573,
177
+ "eval_steps_per_second": 3.074,
178
+ "eval_wer": 0.4608012696604065,
179
+ "step": 1200
180
+ },
181
+ {
182
+ "epoch": 2.78,
183
+ "learning_rate": 0.00029150943396226414,
184
+ "loss": 0.3813,
185
+ "step": 1250
186
+ },
187
+ {
188
+ "epoch": 2.89,
189
+ "learning_rate": 0.00029094339622641506,
190
+ "loss": 0.3572,
191
+ "step": 1300
192
+ },
193
+ {
194
+ "epoch": 3.0,
195
+ "learning_rate": 0.00029037735849056604,
196
+ "loss": 0.3483,
197
+ "step": 1350
198
+ },
199
+ {
200
+ "epoch": 3.11,
201
+ "learning_rate": 0.00028981132075471696,
202
+ "loss": 0.2772,
203
+ "step": 1400
204
+ },
205
+ {
206
+ "epoch": 3.22,
207
+ "learning_rate": 0.0002892452830188679,
208
+ "loss": 0.2977,
209
+ "step": 1450
210
+ },
211
+ {
212
+ "epoch": 3.33,
213
+ "learning_rate": 0.00028867924528301885,
214
+ "loss": 0.2802,
215
+ "step": 1500
216
+ },
217
+ {
218
+ "epoch": 3.44,
219
+ "learning_rate": 0.00028811320754716977,
220
+ "loss": 0.2913,
221
+ "step": 1550
222
+ },
223
+ {
224
+ "epoch": 3.56,
225
+ "learning_rate": 0.00028754716981132074,
226
+ "loss": 0.2899,
227
+ "step": 1600
228
+ },
229
+ {
230
+ "epoch": 3.56,
231
+ "eval_loss": 0.5040135383605957,
232
+ "eval_runtime": 232.8201,
233
+ "eval_samples_per_second": 24.517,
234
+ "eval_steps_per_second": 3.067,
235
+ "eval_wer": 0.420412232689686,
236
+ "step": 1600
237
+ },
238
+ {
239
+ "epoch": 3.67,
240
+ "learning_rate": 0.00028698113207547166,
241
+ "loss": 0.2802,
242
+ "step": 1650
243
+ },
244
+ {
245
+ "epoch": 3.78,
246
+ "learning_rate": 0.00028641509433962264,
247
+ "loss": 0.2769,
248
+ "step": 1700
249
+ },
250
+ {
251
+ "epoch": 3.89,
252
+ "learning_rate": 0.00028584905660377356,
253
+ "loss": 0.2632,
254
+ "step": 1750
255
+ },
256
+ {
257
+ "epoch": 4.0,
258
+ "learning_rate": 0.00028528301886792453,
259
+ "loss": 0.2784,
260
+ "step": 1800
261
+ },
262
+ {
263
+ "epoch": 4.11,
264
+ "learning_rate": 0.00028471698113207545,
265
+ "loss": 0.2341,
266
+ "step": 1850
267
+ },
268
+ {
269
+ "epoch": 4.22,
270
+ "learning_rate": 0.00028415094339622637,
271
+ "loss": 0.2346,
272
+ "step": 1900
273
+ },
274
+ {
275
+ "epoch": 4.33,
276
+ "learning_rate": 0.00028358490566037734,
277
+ "loss": 0.2301,
278
+ "step": 1950
279
+ },
280
+ {
281
+ "epoch": 4.44,
282
+ "learning_rate": 0.00028301886792452826,
283
+ "loss": 0.2376,
284
+ "step": 2000
285
+ },
286
+ {
287
+ "epoch": 4.44,
288
+ "eval_loss": 0.4637417495250702,
289
+ "eval_runtime": 233.0278,
290
+ "eval_samples_per_second": 24.495,
291
+ "eval_steps_per_second": 3.064,
292
+ "eval_wer": 0.3989663662074999,
293
+ "step": 2000
294
+ },
295
+ {
296
+ "epoch": 4.56,
297
+ "learning_rate": 0.00028245283018867923,
298
+ "loss": 0.2474,
299
+ "step": 2050
300
+ },
301
+ {
302
+ "epoch": 4.67,
303
+ "learning_rate": 0.00028188679245283015,
304
+ "loss": 0.2325,
305
+ "step": 2100
306
+ },
307
+ {
308
+ "epoch": 4.78,
309
+ "learning_rate": 0.00028132075471698113,
310
+ "loss": 0.2477,
311
+ "step": 2150
312
+ },
313
+ {
314
+ "epoch": 4.89,
315
+ "learning_rate": 0.00028075471698113205,
316
+ "loss": 0.2319,
317
+ "step": 2200
318
+ },
319
+ {
320
+ "epoch": 5.0,
321
+ "learning_rate": 0.000280188679245283,
322
+ "loss": 0.2342,
323
+ "step": 2250
324
+ },
325
+ {
326
+ "epoch": 5.11,
327
+ "learning_rate": 0.00027962264150943394,
328
+ "loss": 0.2176,
329
+ "step": 2300
330
+ },
331
+ {
332
+ "epoch": 5.22,
333
+ "learning_rate": 0.00027905660377358486,
334
+ "loss": 0.2015,
335
+ "step": 2350
336
+ },
337
+ {
338
+ "epoch": 5.33,
339
+ "learning_rate": 0.00027849056603773583,
340
+ "loss": 0.2063,
341
+ "step": 2400
342
+ },
343
+ {
344
+ "epoch": 5.33,
345
+ "eval_loss": 0.4362342655658722,
346
+ "eval_runtime": 234.4496,
347
+ "eval_samples_per_second": 24.346,
348
+ "eval_steps_per_second": 3.045,
349
+ "eval_wer": 0.3932488249537103,
350
+ "step": 2400
351
+ },
352
+ {
353
+ "epoch": 5.44,
354
+ "learning_rate": 0.00027792452830188675,
355
+ "loss": 0.2116,
356
+ "step": 2450
357
+ },
358
+ {
359
+ "epoch": 5.56,
360
+ "learning_rate": 0.0002773584905660377,
361
+ "loss": 0.2084,
362
+ "step": 2500
363
+ },
364
+ {
365
+ "epoch": 5.67,
366
+ "learning_rate": 0.00027679245283018865,
367
+ "loss": 0.2122,
368
+ "step": 2550
369
+ },
370
+ {
371
+ "epoch": 5.78,
372
+ "learning_rate": 0.0002762264150943396,
373
+ "loss": 0.2063,
374
+ "step": 2600
375
+ },
376
+ {
377
+ "epoch": 5.89,
378
+ "learning_rate": 0.00027566037735849054,
379
+ "loss": 0.1902,
380
+ "step": 2650
381
+ },
382
+ {
383
+ "epoch": 6.0,
384
+ "learning_rate": 0.0002750943396226415,
385
+ "loss": 0.2067,
386
+ "step": 2700
387
+ },
388
+ {
389
+ "epoch": 6.11,
390
+ "learning_rate": 0.00027452830188679243,
391
+ "loss": 0.1845,
392
+ "step": 2750
393
+ },
394
+ {
395
+ "epoch": 6.22,
396
+ "learning_rate": 0.0002739622641509434,
397
+ "loss": 0.1773,
398
+ "step": 2800
399
+ },
400
+ {
401
+ "epoch": 6.22,
402
+ "eval_loss": 0.4410901367664337,
403
+ "eval_runtime": 235.6532,
404
+ "eval_samples_per_second": 24.222,
405
+ "eval_steps_per_second": 3.03,
406
+ "eval_wer": 0.3876737135532179,
407
+ "step": 2800
408
+ },
409
+ {
410
+ "epoch": 6.33,
411
+ "learning_rate": 0.0002733962264150943,
412
+ "loss": 0.1793,
413
+ "step": 2850
414
+ },
415
+ {
416
+ "epoch": 6.44,
417
+ "learning_rate": 0.00027283018867924524,
418
+ "loss": 0.1764,
419
+ "step": 2900
420
+ },
421
+ {
422
+ "epoch": 6.56,
423
+ "learning_rate": 0.0002722641509433962,
424
+ "loss": 0.1882,
425
+ "step": 2950
426
+ },
427
+ {
428
+ "epoch": 6.67,
429
+ "learning_rate": 0.00027169811320754714,
430
+ "loss": 0.1892,
431
+ "step": 3000
432
+ },
433
+ {
434
+ "epoch": 6.78,
435
+ "learning_rate": 0.0002711320754716981,
436
+ "loss": 0.1941,
437
+ "step": 3050
438
+ },
439
+ {
440
+ "epoch": 6.89,
441
+ "learning_rate": 0.00027056603773584903,
442
+ "loss": 0.1858,
443
+ "step": 3100
444
+ },
445
+ {
446
+ "epoch": 7.0,
447
+ "learning_rate": 0.00027,
448
+ "loss": 0.1817,
449
+ "step": 3150
450
+ },
451
+ {
452
+ "epoch": 7.11,
453
+ "learning_rate": 0.0002694339622641509,
454
+ "loss": 0.1676,
455
+ "step": 3200
456
+ },
457
+ {
458
+ "epoch": 7.11,
459
+ "eval_loss": 0.479378342628479,
460
+ "eval_runtime": 234.1851,
461
+ "eval_samples_per_second": 24.374,
462
+ "eval_steps_per_second": 3.049,
463
+ "eval_wer": 0.38449956253687917,
464
+ "step": 3200
465
+ },
466
+ {
467
+ "epoch": 7.22,
468
+ "learning_rate": 0.0002688679245283019,
469
+ "loss": 0.1728,
470
+ "step": 3250
471
+ },
472
+ {
473
+ "epoch": 7.33,
474
+ "learning_rate": 0.0002683018867924528,
475
+ "loss": 0.1597,
476
+ "step": 3300
477
+ },
478
+ {
479
+ "epoch": 7.44,
480
+ "learning_rate": 0.00026773584905660374,
481
+ "loss": 0.1695,
482
+ "step": 3350
483
+ },
484
+ {
485
+ "epoch": 7.56,
486
+ "learning_rate": 0.0002671698113207547,
487
+ "loss": 0.1662,
488
+ "step": 3400
489
+ },
490
+ {
491
+ "epoch": 7.67,
492
+ "learning_rate": 0.00026660377358490563,
493
+ "loss": 0.1612,
494
+ "step": 3450
495
+ },
496
+ {
497
+ "epoch": 7.78,
498
+ "learning_rate": 0.0002660377358490566,
499
+ "loss": 0.1611,
500
+ "step": 3500
501
+ },
502
+ {
503
+ "epoch": 7.89,
504
+ "learning_rate": 0.0002654716981132075,
505
+ "loss": 0.1669,
506
+ "step": 3550
507
+ },
508
+ {
509
+ "epoch": 8.0,
510
+ "learning_rate": 0.0002649056603773585,
511
+ "loss": 0.1728,
512
+ "step": 3600
513
+ },
514
+ {
515
+ "epoch": 8.0,
516
+ "eval_loss": 0.4429037272930145,
517
+ "eval_runtime": 233.3956,
518
+ "eval_samples_per_second": 24.456,
519
+ "eval_steps_per_second": 3.059,
520
+ "eval_wer": 0.37750015260341424,
521
+ "step": 3600
522
+ },
523
+ {
524
+ "epoch": 8.11,
525
+ "learning_rate": 0.0002643396226415094,
526
+ "loss": 0.1543,
527
+ "step": 3650
528
+ },
529
+ {
530
+ "epoch": 8.22,
531
+ "learning_rate": 0.0002637735849056604,
532
+ "loss": 0.152,
533
+ "step": 3700
534
+ },
535
+ {
536
+ "epoch": 8.33,
537
+ "learning_rate": 0.0002632075471698113,
538
+ "loss": 0.1552,
539
+ "step": 3750
540
+ },
541
+ {
542
+ "epoch": 8.44,
543
+ "learning_rate": 0.0002626415094339622,
544
+ "loss": 0.16,
545
+ "step": 3800
546
+ },
547
+ {
548
+ "epoch": 8.56,
549
+ "learning_rate": 0.0002620754716981132,
550
+ "loss": 0.1645,
551
+ "step": 3850
552
+ },
553
+ {
554
+ "epoch": 8.67,
555
+ "learning_rate": 0.0002615094339622641,
556
+ "loss": 0.158,
557
+ "step": 3900
558
+ },
559
+ {
560
+ "epoch": 8.78,
561
+ "learning_rate": 0.0002609433962264151,
562
+ "loss": 0.1654,
563
+ "step": 3950
564
+ },
565
+ {
566
+ "epoch": 8.89,
567
+ "learning_rate": 0.000260377358490566,
568
+ "loss": 0.1556,
569
+ "step": 4000
570
+ },
571
+ {
572
+ "epoch": 8.89,
573
+ "eval_loss": 0.4799572825431824,
574
+ "eval_runtime": 233.7994,
575
+ "eval_samples_per_second": 24.414,
576
+ "eval_steps_per_second": 3.054,
577
+ "eval_wer": 0.38354324780759763,
578
+ "step": 4000
579
+ },
580
+ {
581
+ "epoch": 9.0,
582
+ "learning_rate": 0.000259811320754717,
583
+ "loss": 0.1627,
584
+ "step": 4050
585
+ },
586
+ {
587
+ "epoch": 9.11,
588
+ "learning_rate": 0.0002592452830188679,
589
+ "loss": 0.1443,
590
+ "step": 4100
591
+ },
592
+ {
593
+ "epoch": 9.22,
594
+ "learning_rate": 0.0002586792452830189,
595
+ "loss": 0.1527,
596
+ "step": 4150
597
+ },
598
+ {
599
+ "epoch": 9.33,
600
+ "learning_rate": 0.0002581132075471698,
601
+ "loss": 0.1516,
602
+ "step": 4200
603
+ },
604
+ {
605
+ "epoch": 9.44,
606
+ "learning_rate": 0.0002575471698113207,
607
+ "loss": 0.1554,
608
+ "step": 4250
609
+ },
610
+ {
611
+ "epoch": 9.56,
612
+ "learning_rate": 0.0002569811320754717,
613
+ "loss": 0.1502,
614
+ "step": 4300
615
+ },
616
+ {
617
+ "epoch": 9.67,
618
+ "learning_rate": 0.0002564150943396226,
619
+ "loss": 0.1496,
620
+ "step": 4350
621
+ },
622
+ {
623
+ "epoch": 9.78,
624
+ "learning_rate": 0.0002558490566037736,
625
+ "loss": 0.1514,
626
+ "step": 4400
627
+ },
628
+ {
629
+ "epoch": 9.78,
630
+ "eval_loss": 0.46985962986946106,
631
+ "eval_runtime": 235.3127,
632
+ "eval_samples_per_second": 24.257,
633
+ "eval_steps_per_second": 3.034,
634
+ "eval_wer": 0.3755264817791523,
635
+ "step": 4400
636
+ },
637
+ {
638
+ "epoch": 9.89,
639
+ "learning_rate": 0.0002552830188679245,
640
+ "loss": 0.1501,
641
+ "step": 4450
642
+ },
643
+ {
644
+ "epoch": 10.0,
645
+ "learning_rate": 0.0002547169811320755,
646
+ "loss": 0.1464,
647
+ "step": 4500
648
+ },
649
+ {
650
+ "epoch": 10.11,
651
+ "learning_rate": 0.0002541509433962264,
652
+ "loss": 0.1354,
653
+ "step": 4550
654
+ },
655
+ {
656
+ "epoch": 10.22,
657
+ "learning_rate": 0.00025358490566037737,
658
+ "loss": 0.1315,
659
+ "step": 4600
660
+ },
661
+ {
662
+ "epoch": 10.33,
663
+ "learning_rate": 0.0002530188679245283,
664
+ "loss": 0.1365,
665
+ "step": 4650
666
+ },
667
+ {
668
+ "epoch": 10.44,
669
+ "learning_rate": 0.0002524528301886792,
670
+ "loss": 0.1399,
671
+ "step": 4700
672
+ },
673
+ {
674
+ "epoch": 10.56,
675
+ "learning_rate": 0.0002518867924528302,
676
+ "loss": 0.1351,
677
+ "step": 4750
678
+ },
679
+ {
680
+ "epoch": 10.67,
681
+ "learning_rate": 0.0002513207547169811,
682
+ "loss": 0.1405,
683
+ "step": 4800
684
+ },
685
+ {
686
+ "epoch": 10.67,
687
+ "eval_loss": 0.47201868891716003,
688
+ "eval_runtime": 234.4469,
689
+ "eval_samples_per_second": 24.347,
690
+ "eval_steps_per_second": 3.045,
691
+ "eval_wer": 0.3793517406962785,
692
+ "step": 4800
693
+ },
694
+ {
695
+ "epoch": 10.78,
696
+ "learning_rate": 0.0002507547169811321,
697
+ "loss": 0.1446,
698
+ "step": 4850
699
+ },
700
+ {
701
+ "epoch": 10.89,
702
+ "learning_rate": 0.000250188679245283,
703
+ "loss": 0.1402,
704
+ "step": 4900
705
+ },
706
+ {
707
+ "epoch": 11.0,
708
+ "learning_rate": 0.00024962264150943397,
709
+ "loss": 0.1417,
710
+ "step": 4950
711
+ },
712
+ {
713
+ "epoch": 11.11,
714
+ "learning_rate": 0.0002490566037735849,
715
+ "loss": 0.1326,
716
+ "step": 5000
717
+ },
718
+ {
719
+ "epoch": 11.22,
720
+ "learning_rate": 0.00024849056603773586,
721
+ "loss": 0.1409,
722
+ "step": 5050
723
+ },
724
+ {
725
+ "epoch": 11.33,
726
+ "learning_rate": 0.0002479245283018868,
727
+ "loss": 0.1279,
728
+ "step": 5100
729
+ },
730
+ {
731
+ "epoch": 11.44,
732
+ "learning_rate": 0.0002473584905660377,
733
+ "loss": 0.1333,
734
+ "step": 5150
735
+ },
736
+ {
737
+ "epoch": 11.56,
738
+ "learning_rate": 0.0002467924528301887,
739
+ "loss": 0.1317,
740
+ "step": 5200
741
+ },
742
+ {
743
+ "epoch": 11.56,
744
+ "eval_loss": 0.5062007904052734,
745
+ "eval_runtime": 233.2872,
746
+ "eval_samples_per_second": 24.468,
747
+ "eval_steps_per_second": 3.061,
748
+ "eval_wer": 0.3787006327954911,
749
+ "step": 5200
750
+ },
751
+ {
752
+ "epoch": 11.67,
753
+ "learning_rate": 0.0002462264150943396,
754
+ "loss": 0.1309,
755
+ "step": 5250
756
+ },
757
+ {
758
+ "epoch": 11.78,
759
+ "learning_rate": 0.00024566037735849057,
760
+ "loss": 0.1344,
761
+ "step": 5300
762
+ },
763
+ {
764
+ "epoch": 11.89,
765
+ "learning_rate": 0.0002450943396226415,
766
+ "loss": 0.1339,
767
+ "step": 5350
768
+ },
769
+ {
770
+ "epoch": 12.0,
771
+ "learning_rate": 0.00024452830188679246,
772
+ "loss": 0.1403,
773
+ "step": 5400
774
+ },
775
+ {
776
+ "epoch": 12.11,
777
+ "learning_rate": 0.00024396226415094338,
778
+ "loss": 0.1208,
779
+ "step": 5450
780
+ },
781
+ {
782
+ "epoch": 12.22,
783
+ "learning_rate": 0.00024339622641509433,
784
+ "loss": 0.1278,
785
+ "step": 5500
786
+ },
787
+ {
788
+ "epoch": 12.33,
789
+ "learning_rate": 0.00024283018867924527,
790
+ "loss": 0.1148,
791
+ "step": 5550
792
+ },
793
+ {
794
+ "epoch": 12.44,
795
+ "learning_rate": 0.00024226415094339622,
796
+ "loss": 0.1204,
797
+ "step": 5600
798
+ },
799
+ {
800
+ "epoch": 12.44,
801
+ "eval_loss": 0.4777355492115021,
802
+ "eval_runtime": 233.4682,
803
+ "eval_samples_per_second": 24.449,
804
+ "eval_steps_per_second": 3.058,
805
+ "eval_wer": 0.36860846033328587,
806
+ "step": 5600
807
+ },
808
+ {
809
+ "epoch": 12.56,
810
+ "learning_rate": 0.00024169811320754717,
811
+ "loss": 0.1237,
812
+ "step": 5650
813
+ },
814
+ {
815
+ "epoch": 12.67,
816
+ "learning_rate": 0.0002411320754716981,
817
+ "loss": 0.124,
818
+ "step": 5700
819
+ },
820
+ {
821
+ "epoch": 12.78,
822
+ "learning_rate": 0.00024056603773584906,
823
+ "loss": 0.1358,
824
+ "step": 5750
825
+ },
826
+ {
827
+ "epoch": 12.89,
828
+ "learning_rate": 0.00023999999999999998,
829
+ "loss": 0.1243,
830
+ "step": 5800
831
+ },
832
+ {
833
+ "epoch": 13.0,
834
+ "learning_rate": 0.00023943396226415093,
835
+ "loss": 0.1255,
836
+ "step": 5850
837
+ },
838
+ {
839
+ "epoch": 13.11,
840
+ "learning_rate": 0.00023886792452830187,
841
+ "loss": 0.1158,
842
+ "step": 5900
843
+ },
844
+ {
845
+ "epoch": 13.22,
846
+ "learning_rate": 0.00023830188679245282,
847
+ "loss": 0.1135,
848
+ "step": 5950
849
+ },
850
+ {
851
+ "epoch": 13.33,
852
+ "learning_rate": 0.00023773584905660377,
853
+ "loss": 0.12,
854
+ "step": 6000
855
+ },
856
+ {
857
+ "epoch": 13.33,
858
+ "eval_loss": 0.5171375870704651,
859
+ "eval_runtime": 233.056,
860
+ "eval_samples_per_second": 24.492,
861
+ "eval_steps_per_second": 3.064,
862
+ "eval_wer": 0.3718233055934238,
863
+ "step": 6000
864
+ },
865
+ {
866
+ "epoch": 13.44,
867
+ "learning_rate": 0.0002371698113207547,
868
+ "loss": 0.1218,
869
+ "step": 6050
870
+ },
871
+ {
872
+ "epoch": 13.56,
873
+ "learning_rate": 0.00023660377358490566,
874
+ "loss": 0.1244,
875
+ "step": 6100
876
+ },
877
+ {
878
+ "epoch": 13.67,
879
+ "learning_rate": 0.0002360377358490566,
880
+ "loss": 0.1238,
881
+ "step": 6150
882
+ },
883
+ {
884
+ "epoch": 13.78,
885
+ "learning_rate": 0.00023547169811320755,
886
+ "loss": 0.1188,
887
+ "step": 6200
888
+ },
889
+ {
890
+ "epoch": 13.89,
891
+ "learning_rate": 0.0002349056603773585,
892
+ "loss": 0.1167,
893
+ "step": 6250
894
+ },
895
+ {
896
+ "epoch": 14.0,
897
+ "learning_rate": 0.00023433962264150942,
898
+ "loss": 0.1212,
899
+ "step": 6300
900
+ },
901
+ {
902
+ "epoch": 14.11,
903
+ "learning_rate": 0.00023377358490566036,
904
+ "loss": 0.1209,
905
+ "step": 6350
906
+ },
907
+ {
908
+ "epoch": 14.22,
909
+ "learning_rate": 0.0002332075471698113,
910
+ "loss": 0.1176,
911
+ "step": 6400
912
+ },
913
+ {
914
+ "epoch": 14.22,
915
+ "eval_loss": 0.5209046602249146,
916
+ "eval_runtime": 232.7218,
917
+ "eval_samples_per_second": 24.527,
918
+ "eval_steps_per_second": 3.068,
919
+ "eval_wer": 0.37357315807679004,
920
+ "step": 6400
921
+ },
922
+ {
923
+ "epoch": 14.33,
924
+ "learning_rate": 0.00023264150943396226,
925
+ "loss": 0.1108,
926
+ "step": 6450
927
+ },
928
+ {
929
+ "epoch": 14.44,
930
+ "learning_rate": 0.0002320754716981132,
931
+ "loss": 0.1201,
932
+ "step": 6500
933
+ },
934
+ {
935
+ "epoch": 14.56,
936
+ "learning_rate": 0.00023150943396226415,
937
+ "loss": 0.1135,
938
+ "step": 6550
939
+ },
940
+ {
941
+ "epoch": 14.67,
942
+ "learning_rate": 0.0002309433962264151,
943
+ "loss": 0.111,
944
+ "step": 6600
945
+ },
946
+ {
947
+ "epoch": 14.78,
948
+ "learning_rate": 0.00023037735849056604,
949
+ "loss": 0.1169,
950
+ "step": 6650
951
+ },
952
+ {
953
+ "epoch": 14.89,
954
+ "learning_rate": 0.000229811320754717,
955
+ "loss": 0.12,
956
+ "step": 6700
957
+ },
958
+ {
959
+ "epoch": 15.0,
960
+ "learning_rate": 0.0002292452830188679,
961
+ "loss": 0.1145,
962
+ "step": 6750
963
+ },
964
+ {
965
+ "epoch": 15.11,
966
+ "learning_rate": 0.00022867924528301886,
967
+ "loss": 0.1102,
968
+ "step": 6800
969
+ },
970
+ {
971
+ "epoch": 15.11,
972
+ "eval_loss": 0.5242590308189392,
973
+ "eval_runtime": 233.3294,
974
+ "eval_samples_per_second": 24.463,
975
+ "eval_steps_per_second": 3.06,
976
+ "eval_wer": 0.37780535943190835,
977
+ "step": 6800
978
+ },
979
+ {
980
+ "epoch": 15.22,
981
+ "learning_rate": 0.0002281132075471698,
982
+ "loss": 0.0994,
983
+ "step": 6850
984
+ },
985
+ {
986
+ "epoch": 15.33,
987
+ "learning_rate": 0.00022754716981132075,
988
+ "loss": 0.1196,
989
+ "step": 6900
990
+ },
991
+ {
992
+ "epoch": 15.44,
993
+ "learning_rate": 0.0002269811320754717,
994
+ "loss": 0.1125,
995
+ "step": 6950
996
+ },
997
+ {
998
+ "epoch": 15.56,
999
+ "learning_rate": 0.00022641509433962264,
1000
+ "loss": 0.1122,
1001
+ "step": 7000
1002
+ },
1003
+ {
1004
+ "epoch": 15.67,
1005
+ "learning_rate": 0.0002258490566037736,
1006
+ "loss": 0.1235,
1007
+ "step": 7050
1008
+ },
1009
+ {
1010
+ "epoch": 15.78,
1011
+ "learning_rate": 0.00022528301886792453,
1012
+ "loss": 0.1088,
1013
+ "step": 7100
1014
+ },
1015
+ {
1016
+ "epoch": 15.89,
1017
+ "learning_rate": 0.00022471698113207543,
1018
+ "loss": 0.1089,
1019
+ "step": 7150
1020
+ },
1021
+ {
1022
+ "epoch": 16.0,
1023
+ "learning_rate": 0.00022415094339622637,
1024
+ "loss": 0.1097,
1025
+ "step": 7200
1026
+ },
1027
+ {
1028
+ "epoch": 16.0,
1029
+ "eval_loss": 0.4982779622077942,
1030
+ "eval_runtime": 233.2014,
1031
+ "eval_samples_per_second": 24.477,
1032
+ "eval_steps_per_second": 3.062,
1033
+ "eval_wer": 0.3620159928378131,
1034
+ "step": 7200
1035
+ },
1036
+ {
1037
+ "epoch": 16.11,
1038
+ "learning_rate": 0.00022358490566037732,
1039
+ "loss": 0.1023,
1040
+ "step": 7250
1041
+ },
1042
+ {
1043
+ "epoch": 16.22,
1044
+ "learning_rate": 0.00022301886792452827,
1045
+ "loss": 0.097,
1046
+ "step": 7300
1047
+ },
1048
+ {
1049
+ "epoch": 16.33,
1050
+ "learning_rate": 0.0002224528301886792,
1051
+ "loss": 0.1002,
1052
+ "step": 7350
1053
+ },
1054
+ {
1055
+ "epoch": 16.44,
1056
+ "learning_rate": 0.00022188679245283016,
1057
+ "loss": 0.1051,
1058
+ "step": 7400
1059
+ },
1060
+ {
1061
+ "epoch": 16.56,
1062
+ "learning_rate": 0.0002213207547169811,
1063
+ "loss": 0.1114,
1064
+ "step": 7450
1065
+ },
1066
+ {
1067
+ "epoch": 16.67,
1068
+ "learning_rate": 0.00022075471698113205,
1069
+ "loss": 0.0984,
1070
+ "step": 7500
1071
+ },
1072
+ {
1073
+ "epoch": 16.78,
1074
+ "learning_rate": 0.00022018867924528297,
1075
+ "loss": 0.1121,
1076
+ "step": 7550
1077
+ },
1078
+ {
1079
+ "epoch": 16.89,
1080
+ "learning_rate": 0.00021962264150943392,
1081
+ "loss": 0.1091,
1082
+ "step": 7600
1083
+ },
1084
+ {
1085
+ "epoch": 16.89,
1086
+ "eval_loss": 0.4976480007171631,
1087
+ "eval_runtime": 232.5964,
1088
+ "eval_samples_per_second": 24.54,
1089
+ "eval_steps_per_second": 3.07,
1090
+ "eval_wer": 0.366166805705333,
1091
+ "step": 7600
1092
+ },
1093
+ {
1094
+ "epoch": 17.0,
1095
+ "learning_rate": 0.00021905660377358486,
1096
+ "loss": 0.1061,
1097
+ "step": 7650
1098
+ },
1099
+ {
1100
+ "epoch": 17.11,
1101
+ "learning_rate": 0.0002184905660377358,
1102
+ "loss": 0.0962,
1103
+ "step": 7700
1104
+ },
1105
+ {
1106
+ "epoch": 17.22,
1107
+ "learning_rate": 0.00021792452830188676,
1108
+ "loss": 0.101,
1109
+ "step": 7750
1110
+ },
1111
+ {
1112
+ "epoch": 17.33,
1113
+ "learning_rate": 0.0002173584905660377,
1114
+ "loss": 0.1109,
1115
+ "step": 7800
1116
+ },
1117
+ {
1118
+ "epoch": 17.44,
1119
+ "learning_rate": 0.00021679245283018865,
1120
+ "loss": 0.1021,
1121
+ "step": 7850
1122
+ },
1123
+ {
1124
+ "epoch": 17.56,
1125
+ "learning_rate": 0.0002162264150943396,
1126
+ "loss": 0.1011,
1127
+ "step": 7900
1128
+ },
1129
+ {
1130
+ "epoch": 17.67,
1131
+ "learning_rate": 0.00021566037735849054,
1132
+ "loss": 0.0992,
1133
+ "step": 7950
1134
+ },
1135
+ {
1136
+ "epoch": 17.78,
1137
+ "learning_rate": 0.00021509433962264146,
1138
+ "loss": 0.104,
1139
+ "step": 8000
1140
+ },
1141
+ {
1142
+ "epoch": 17.78,
1143
+ "eval_loss": 0.5483397245407104,
1144
+ "eval_runtime": 232.4366,
1145
+ "eval_samples_per_second": 24.557,
1146
+ "eval_steps_per_second": 3.072,
1147
+ "eval_wer": 0.3651901438541518,
1148
+ "step": 8000
1149
+ },
1150
+ {
1151
+ "epoch": 17.89,
1152
+ "learning_rate": 0.0002145283018867924,
1153
+ "loss": 0.1047,
1154
+ "step": 8050
1155
+ },
1156
+ {
1157
+ "epoch": 18.0,
1158
+ "learning_rate": 0.00021396226415094336,
1159
+ "loss": 0.1026,
1160
+ "step": 8100
1161
+ },
1162
+ {
1163
+ "epoch": 18.11,
1164
+ "learning_rate": 0.0002133962264150943,
1165
+ "loss": 0.0929,
1166
+ "step": 8150
1167
+ },
1168
+ {
1169
+ "epoch": 18.22,
1170
+ "learning_rate": 0.00021283018867924525,
1171
+ "loss": 0.0933,
1172
+ "step": 8200
1173
+ },
1174
+ {
1175
+ "epoch": 18.33,
1176
+ "learning_rate": 0.0002122641509433962,
1177
+ "loss": 0.0971,
1178
+ "step": 8250
1179
+ },
1180
+ {
1181
+ "epoch": 18.44,
1182
+ "learning_rate": 0.00021169811320754714,
1183
+ "loss": 0.097,
1184
+ "step": 8300
1185
+ },
1186
+ {
1187
+ "epoch": 18.56,
1188
+ "learning_rate": 0.0002111320754716981,
1189
+ "loss": 0.097,
1190
+ "step": 8350
1191
+ },
1192
+ {
1193
+ "epoch": 18.67,
1194
+ "learning_rate": 0.00021056603773584904,
1195
+ "loss": 0.1014,
1196
+ "step": 8400
1197
+ },
1198
+ {
1199
+ "epoch": 18.67,
1200
+ "eval_loss": 0.5109674334526062,
1201
+ "eval_runtime": 233.1396,
1202
+ "eval_samples_per_second": 24.483,
1203
+ "eval_steps_per_second": 3.063,
1204
+ "eval_wer": 0.3620159928378131,
1205
+ "step": 8400
1206
+ },
1207
+ {
1208
+ "epoch": 18.78,
1209
+ "learning_rate": 0.00020999999999999998,
1210
+ "loss": 0.0936,
1211
+ "step": 8450
1212
+ },
1213
+ {
1214
+ "epoch": 18.89,
1215
+ "learning_rate": 0.0002094339622641509,
1216
+ "loss": 0.0948,
1217
+ "step": 8500
1218
+ },
1219
+ {
1220
+ "epoch": 19.0,
1221
+ "learning_rate": 0.00020886792452830185,
1222
+ "loss": 0.0928,
1223
+ "step": 8550
1224
+ },
1225
+ {
1226
+ "epoch": 19.11,
1227
+ "learning_rate": 0.0002083018867924528,
1228
+ "loss": 0.0912,
1229
+ "step": 8600
1230
+ },
1231
+ {
1232
+ "epoch": 19.22,
1233
+ "learning_rate": 0.00020773584905660374,
1234
+ "loss": 0.0918,
1235
+ "step": 8650
1236
+ },
1237
+ {
1238
+ "epoch": 19.33,
1239
+ "learning_rate": 0.0002071698113207547,
1240
+ "loss": 0.0902,
1241
+ "step": 8700
1242
+ },
1243
+ {
1244
+ "epoch": 19.44,
1245
+ "learning_rate": 0.00020660377358490563,
1246
+ "loss": 0.0961,
1247
+ "step": 8750
1248
+ },
1249
+ {
1250
+ "epoch": 19.56,
1251
+ "learning_rate": 0.00020603773584905658,
1252
+ "loss": 0.0921,
1253
+ "step": 8800
1254
+ },
1255
+ {
1256
+ "epoch": 19.56,
1257
+ "eval_loss": 0.4945477545261383,
1258
+ "eval_runtime": 232.6384,
1259
+ "eval_samples_per_second": 24.536,
1260
+ "eval_steps_per_second": 3.069,
1261
+ "eval_wer": 0.3609375953771339,
1262
+ "step": 8800
1263
+ },
1264
+ {
1265
+ "epoch": 19.67,
1266
+ "learning_rate": 0.00020547169811320753,
1267
+ "loss": 0.0957,
1268
+ "step": 8850
1269
+ },
1270
+ {
1271
+ "epoch": 19.78,
1272
+ "learning_rate": 0.00020491698113207546,
1273
+ "loss": 0.0958,
1274
+ "step": 8900
1275
+ },
1276
+ {
1277
+ "epoch": 19.89,
1278
+ "learning_rate": 0.0002043509433962264,
1279
+ "loss": 0.0952,
1280
+ "step": 8950
1281
+ },
1282
+ {
1283
+ "epoch": 20.0,
1284
+ "learning_rate": 0.00020378490566037735,
1285
+ "loss": 0.0847,
1286
+ "step": 9000
1287
+ },
1288
+ {
1289
+ "epoch": 20.11,
1290
+ "learning_rate": 0.00020321886792452827,
1291
+ "loss": 0.0921,
1292
+ "step": 9050
1293
+ },
1294
+ {
1295
+ "epoch": 20.22,
1296
+ "learning_rate": 0.00020265283018867922,
1297
+ "loss": 0.0885,
1298
+ "step": 9100
1299
+ },
1300
+ {
1301
+ "epoch": 20.33,
1302
+ "learning_rate": 0.00020208679245283017,
1303
+ "loss": 0.0881,
1304
+ "step": 9150
1305
+ },
1306
+ {
1307
+ "epoch": 20.44,
1308
+ "learning_rate": 0.0002015207547169811,
1309
+ "loss": 0.0943,
1310
+ "step": 9200
1311
+ },
1312
+ {
1313
+ "epoch": 20.44,
1314
+ "eval_loss": 0.5395381450653076,
1315
+ "eval_runtime": 232.8056,
1316
+ "eval_samples_per_second": 24.518,
1317
+ "eval_steps_per_second": 3.067,
1318
+ "eval_wer": 0.36470181292856124,
1319
+ "step": 9200
1320
+ },
1321
+ {
1322
+ "epoch": 20.56,
1323
+ "learning_rate": 0.00020095471698113206,
1324
+ "loss": 0.0888,
1325
+ "step": 9250
1326
+ },
1327
+ {
1328
+ "epoch": 20.67,
1329
+ "learning_rate": 0.000200388679245283,
1330
+ "loss": 0.0894,
1331
+ "step": 9300
1332
+ },
1333
+ {
1334
+ "epoch": 20.78,
1335
+ "learning_rate": 0.00019982264150943395,
1336
+ "loss": 0.0883,
1337
+ "step": 9350
1338
+ },
1339
+ {
1340
+ "epoch": 20.89,
1341
+ "learning_rate": 0.0001992566037735849,
1342
+ "loss": 0.0873,
1343
+ "step": 9400
1344
+ },
1345
+ {
1346
+ "epoch": 21.0,
1347
+ "learning_rate": 0.00019869056603773584,
1348
+ "loss": 0.095,
1349
+ "step": 9450
1350
+ },
1351
+ {
1352
+ "epoch": 21.11,
1353
+ "learning_rate": 0.00019812452830188676,
1354
+ "loss": 0.087,
1355
+ "step": 9500
1356
+ },
1357
+ {
1358
+ "epoch": 21.22,
1359
+ "learning_rate": 0.0001975584905660377,
1360
+ "loss": 0.0795,
1361
+ "step": 9550
1362
+ },
1363
+ {
1364
+ "epoch": 21.33,
1365
+ "learning_rate": 0.00019699245283018866,
1366
+ "loss": 0.0877,
1367
+ "step": 9600
1368
+ },
1369
+ {
1370
+ "epoch": 21.33,
1371
+ "eval_loss": 0.516017735004425,
1372
+ "eval_runtime": 232.5407,
1373
+ "eval_samples_per_second": 24.546,
1374
+ "eval_steps_per_second": 3.07,
1375
+ "eval_wer": 0.37127393330213443,
1376
+ "step": 9600
1377
+ },
1378
+ {
1379
+ "epoch": 21.44,
1380
+ "learning_rate": 0.0001964264150943396,
1381
+ "loss": 0.0902,
1382
+ "step": 9650
1383
+ },
1384
+ {
1385
+ "epoch": 21.56,
1386
+ "learning_rate": 0.00019586037735849055,
1387
+ "loss": 0.0898,
1388
+ "step": 9700
1389
+ },
1390
+ {
1391
+ "epoch": 21.67,
1392
+ "learning_rate": 0.0001952943396226415,
1393
+ "loss": 0.0934,
1394
+ "step": 9750
1395
+ },
1396
+ {
1397
+ "epoch": 21.78,
1398
+ "learning_rate": 0.00019472830188679244,
1399
+ "loss": 0.0845,
1400
+ "step": 9800
1401
+ },
1402
+ {
1403
+ "epoch": 21.89,
1404
+ "learning_rate": 0.0001941622641509434,
1405
+ "loss": 0.0916,
1406
+ "step": 9850
1407
+ },
1408
+ {
1409
+ "epoch": 22.0,
1410
+ "learning_rate": 0.00019359622641509434,
1411
+ "loss": 0.0878,
1412
+ "step": 9900
1413
+ },
1414
+ {
1415
+ "epoch": 22.11,
1416
+ "learning_rate": 0.00019303018867924526,
1417
+ "loss": 0.0864,
1418
+ "step": 9950
1419
+ },
1420
+ {
1421
+ "epoch": 22.22,
1422
+ "learning_rate": 0.0001924641509433962,
1423
+ "loss": 0.0768,
1424
+ "step": 10000
1425
+ },
1426
+ {
1427
+ "epoch": 22.22,
1428
+ "eval_loss": 0.5633887052536011,
1429
+ "eval_runtime": 233.9063,
1430
+ "eval_samples_per_second": 24.403,
1431
+ "eval_steps_per_second": 3.053,
1432
+ "eval_wer": 0.36429487049056913,
1433
+ "step": 10000
1434
+ },
1435
+ {
1436
+ "epoch": 22.33,
1437
+ "learning_rate": 0.00019189811320754715,
1438
+ "loss": 0.0888,
1439
+ "step": 10050
1440
+ },
1441
+ {
1442
+ "epoch": 22.44,
1443
+ "learning_rate": 0.0001913320754716981,
1444
+ "loss": 0.0876,
1445
+ "step": 10100
1446
+ },
1447
+ {
1448
+ "epoch": 22.56,
1449
+ "learning_rate": 0.00019076603773584904,
1450
+ "loss": 0.0835,
1451
+ "step": 10150
1452
+ },
1453
+ {
1454
+ "epoch": 22.67,
1455
+ "learning_rate": 0.0001902,
1456
+ "loss": 0.0766,
1457
+ "step": 10200
1458
+ },
1459
+ {
1460
+ "epoch": 22.78,
1461
+ "learning_rate": 0.00018963396226415093,
1462
+ "loss": 0.0909,
1463
+ "step": 10250
1464
+ },
1465
+ {
1466
+ "epoch": 22.89,
1467
+ "learning_rate": 0.00018906792452830188,
1468
+ "loss": 0.08,
1469
+ "step": 10300
1470
+ },
1471
+ {
1472
+ "epoch": 23.0,
1473
+ "learning_rate": 0.00018850188679245283,
1474
+ "loss": 0.081,
1475
+ "step": 10350
1476
+ },
1477
+ {
1478
+ "epoch": 23.11,
1479
+ "learning_rate": 0.00018793584905660375,
1480
+ "loss": 0.0744,
1481
+ "step": 10400
1482
+ },
1483
+ {
1484
+ "epoch": 23.11,
1485
+ "eval_loss": 0.5204855799674988,
1486
+ "eval_runtime": 235.6787,
1487
+ "eval_samples_per_second": 24.219,
1488
+ "eval_steps_per_second": 3.03,
1489
+ "eval_wer": 0.3642745233686695,
1490
+ "step": 10400
1491
+ },
1492
+ {
1493
+ "epoch": 23.22,
1494
+ "learning_rate": 0.0001873698113207547,
1495
+ "loss": 0.0817,
1496
+ "step": 10450
1497
+ },
1498
+ {
1499
+ "epoch": 23.33,
1500
+ "learning_rate": 0.00018680377358490564,
1501
+ "loss": 0.0791,
1502
+ "step": 10500
1503
+ },
1504
+ {
1505
+ "epoch": 23.44,
1506
+ "learning_rate": 0.0001862377358490566,
1507
+ "loss": 0.0779,
1508
+ "step": 10550
1509
+ },
1510
+ {
1511
+ "epoch": 23.56,
1512
+ "learning_rate": 0.00018567169811320753,
1513
+ "loss": 0.0818,
1514
+ "step": 10600
1515
+ },
1516
+ {
1517
+ "epoch": 23.67,
1518
+ "learning_rate": 0.00018510566037735848,
1519
+ "loss": 0.0801,
1520
+ "step": 10650
1521
+ },
1522
+ {
1523
+ "epoch": 23.78,
1524
+ "learning_rate": 0.00018453962264150943,
1525
+ "loss": 0.0843,
1526
+ "step": 10700
1527
+ },
1528
+ {
1529
+ "epoch": 23.89,
1530
+ "learning_rate": 0.00018397358490566037,
1531
+ "loss": 0.0832,
1532
+ "step": 10750
1533
+ },
1534
+ {
1535
+ "epoch": 24.0,
1536
+ "learning_rate": 0.00018340754716981132,
1537
+ "loss": 0.0852,
1538
+ "step": 10800
1539
+ },
1540
+ {
1541
+ "epoch": 24.0,
1542
+ "eval_loss": 0.5427780151367188,
1543
+ "eval_runtime": 234.1837,
1544
+ "eval_samples_per_second": 24.374,
1545
+ "eval_steps_per_second": 3.049,
1546
+ "eval_wer": 0.36130384357132683,
1547
+ "step": 10800
1548
+ },
1549
+ {
1550
+ "epoch": 24.11,
1551
+ "learning_rate": 0.00018284150943396224,
1552
+ "loss": 0.0779,
1553
+ "step": 10850
1554
+ },
1555
+ {
1556
+ "epoch": 24.22,
1557
+ "learning_rate": 0.00018227547169811319,
1558
+ "loss": 0.0734,
1559
+ "step": 10900
1560
+ },
1561
+ {
1562
+ "epoch": 24.33,
1563
+ "learning_rate": 0.00018170943396226413,
1564
+ "loss": 0.0843,
1565
+ "step": 10950
1566
+ },
1567
+ {
1568
+ "epoch": 24.44,
1569
+ "learning_rate": 0.00018114339622641508,
1570
+ "loss": 0.0777,
1571
+ "step": 11000
1572
+ },
1573
+ {
1574
+ "epoch": 24.56,
1575
+ "learning_rate": 0.00018057735849056602,
1576
+ "loss": 0.0782,
1577
+ "step": 11050
1578
+ },
1579
+ {
1580
+ "epoch": 24.67,
1581
+ "learning_rate": 0.00018001132075471697,
1582
+ "loss": 0.0783,
1583
+ "step": 11100
1584
+ },
1585
+ {
1586
+ "epoch": 24.78,
1587
+ "learning_rate": 0.00017944528301886792,
1588
+ "loss": 0.076,
1589
+ "step": 11150
1590
+ },
1591
+ {
1592
+ "epoch": 24.89,
1593
+ "learning_rate": 0.00017887924528301886,
1594
+ "loss": 0.0732,
1595
+ "step": 11200
1596
+ },
1597
+ {
1598
+ "epoch": 24.89,
1599
+ "eval_loss": 0.551848292350769,
1600
+ "eval_runtime": 232.8874,
1601
+ "eval_samples_per_second": 24.51,
1602
+ "eval_steps_per_second": 3.066,
1603
+ "eval_wer": 0.3628298777137974,
1604
+ "step": 11200
1605
+ },
1606
+ {
1607
+ "epoch": 25.0,
1608
+ "learning_rate": 0.0001783245283018868,
1609
+ "loss": 0.0861,
1610
+ "step": 11250
1611
+ },
1612
+ {
1613
+ "epoch": 25.11,
1614
+ "learning_rate": 0.0001777698113207547,
1615
+ "loss": 0.0812,
1616
+ "step": 11300
1617
+ },
1618
+ {
1619
+ "epoch": 25.22,
1620
+ "learning_rate": 0.00017720377358490565,
1621
+ "loss": 0.0765,
1622
+ "step": 11350
1623
+ },
1624
+ {
1625
+ "epoch": 25.33,
1626
+ "learning_rate": 0.00017663773584905657,
1627
+ "loss": 0.0775,
1628
+ "step": 11400
1629
+ },
1630
+ {
1631
+ "epoch": 25.44,
1632
+ "learning_rate": 0.00017607169811320752,
1633
+ "loss": 0.0762,
1634
+ "step": 11450
1635
+ },
1636
+ {
1637
+ "epoch": 25.56,
1638
+ "learning_rate": 0.00017550566037735846,
1639
+ "loss": 0.0782,
1640
+ "step": 11500
1641
+ },
1642
+ {
1643
+ "epoch": 25.67,
1644
+ "learning_rate": 0.0001749396226415094,
1645
+ "loss": 0.077,
1646
+ "step": 11550
1647
+ },
1648
+ {
1649
+ "epoch": 25.78,
1650
+ "learning_rate": 0.00017437358490566036,
1651
+ "loss": 0.0725,
1652
+ "step": 11600
1653
+ },
1654
+ {
1655
+ "epoch": 25.78,
1656
+ "eval_loss": 0.5757771134376526,
1657
+ "eval_runtime": 233.1827,
1658
+ "eval_samples_per_second": 24.479,
1659
+ "eval_steps_per_second": 3.062,
1660
+ "eval_wer": 0.37282031456650455,
1661
+ "step": 11600
1662
+ },
1663
+ {
1664
+ "epoch": 25.89,
1665
+ "learning_rate": 0.0001738075471698113,
1666
+ "loss": 0.08,
1667
+ "step": 11650
1668
+ },
1669
+ {
1670
+ "epoch": 26.0,
1671
+ "learning_rate": 0.00017324150943396225,
1672
+ "loss": 0.0743,
1673
+ "step": 11700
1674
+ },
1675
+ {
1676
+ "epoch": 26.11,
1677
+ "learning_rate": 0.0001726754716981132,
1678
+ "loss": 0.0722,
1679
+ "step": 11750
1680
+ },
1681
+ {
1682
+ "epoch": 26.22,
1683
+ "learning_rate": 0.00017210943396226414,
1684
+ "loss": 0.0725,
1685
+ "step": 11800
1686
+ },
1687
+ {
1688
+ "epoch": 26.33,
1689
+ "learning_rate": 0.00017154339622641506,
1690
+ "loss": 0.0708,
1691
+ "step": 11850
1692
+ },
1693
+ {
1694
+ "epoch": 26.44,
1695
+ "learning_rate": 0.000170977358490566,
1696
+ "loss": 0.081,
1697
+ "step": 11900
1698
+ },
1699
+ {
1700
+ "epoch": 26.56,
1701
+ "learning_rate": 0.00017041132075471695,
1702
+ "loss": 0.0704,
1703
+ "step": 11950
1704
+ },
1705
+ {
1706
+ "epoch": 26.67,
1707
+ "learning_rate": 0.0001698452830188679,
1708
+ "loss": 0.0691,
1709
+ "step": 12000
1710
+ },
1711
+ {
1712
+ "epoch": 26.67,
1713
+ "eval_loss": 0.5725019574165344,
1714
+ "eval_runtime": 232.5631,
1715
+ "eval_samples_per_second": 24.544,
1716
+ "eval_steps_per_second": 3.07,
1717
+ "eval_wer": 0.3584552465053818,
1718
+ "step": 12000
1719
+ },
1720
+ {
1721
+ "epoch": 26.78,
1722
+ "learning_rate": 0.00016927924528301885,
1723
+ "loss": 0.0766,
1724
+ "step": 12050
1725
+ },
1726
+ {
1727
+ "epoch": 26.89,
1728
+ "learning_rate": 0.0001687132075471698,
1729
+ "loss": 0.0767,
1730
+ "step": 12100
1731
+ },
1732
+ {
1733
+ "epoch": 27.0,
1734
+ "learning_rate": 0.00016814716981132074,
1735
+ "loss": 0.0748,
1736
+ "step": 12150
1737
+ },
1738
+ {
1739
+ "epoch": 27.11,
1740
+ "learning_rate": 0.0001675811320754717,
1741
+ "loss": 0.0645,
1742
+ "step": 12200
1743
+ },
1744
+ {
1745
+ "epoch": 27.22,
1746
+ "learning_rate": 0.00016701509433962263,
1747
+ "loss": 0.0701,
1748
+ "step": 12250
1749
+ },
1750
+ {
1751
+ "epoch": 27.33,
1752
+ "learning_rate": 0.00016644905660377358,
1753
+ "loss": 0.0702,
1754
+ "step": 12300
1755
+ },
1756
+ {
1757
+ "epoch": 27.44,
1758
+ "learning_rate": 0.0001658830188679245,
1759
+ "loss": 0.0711,
1760
+ "step": 12350
1761
+ },
1762
+ {
1763
+ "epoch": 27.56,
1764
+ "learning_rate": 0.00016531698113207545,
1765
+ "loss": 0.0664,
1766
+ "step": 12400
1767
+ },
1768
+ {
1769
+ "epoch": 27.56,
1770
+ "eval_loss": 0.5794127583503723,
1771
+ "eval_runtime": 233.0106,
1772
+ "eval_samples_per_second": 24.497,
1773
+ "eval_steps_per_second": 3.064,
1774
+ "eval_wer": 0.35994058640405313,
1775
+ "step": 12400
1776
+ },
1777
+ {
1778
+ "epoch": 27.67,
1779
+ "learning_rate": 0.0001647509433962264,
1780
+ "loss": 0.0692,
1781
+ "step": 12450
1782
+ },
1783
+ {
1784
+ "epoch": 27.78,
1785
+ "learning_rate": 0.00016418490566037734,
1786
+ "loss": 0.0705,
1787
+ "step": 12500
1788
+ },
1789
+ {
1790
+ "epoch": 27.89,
1791
+ "learning_rate": 0.00016361886792452829,
1792
+ "loss": 0.0705,
1793
+ "step": 12550
1794
+ },
1795
+ {
1796
+ "epoch": 28.0,
1797
+ "learning_rate": 0.00016305283018867923,
1798
+ "loss": 0.0655,
1799
+ "step": 12600
1800
+ },
1801
+ {
1802
+ "epoch": 28.11,
1803
+ "learning_rate": 0.00016248679245283018,
1804
+ "loss": 0.0723,
1805
+ "step": 12650
1806
+ },
1807
+ {
1808
+ "epoch": 28.22,
1809
+ "learning_rate": 0.00016192075471698113,
1810
+ "loss": 0.0716,
1811
+ "step": 12700
1812
+ },
1813
+ {
1814
+ "epoch": 28.33,
1815
+ "learning_rate": 0.00016135471698113207,
1816
+ "loss": 0.0709,
1817
+ "step": 12750
1818
+ },
1819
+ {
1820
+ "epoch": 28.44,
1821
+ "learning_rate": 0.000160788679245283,
1822
+ "loss": 0.0663,
1823
+ "step": 12800
1824
+ },
1825
+ {
1826
+ "epoch": 28.44,
1827
+ "eval_loss": 0.5777014493942261,
1828
+ "eval_runtime": 235.2144,
1829
+ "eval_samples_per_second": 24.267,
1830
+ "eval_steps_per_second": 3.036,
1831
+ "eval_wer": 0.35607463324312777,
1832
+ "step": 12800
1833
+ },
1834
+ {
1835
+ "epoch": 28.56,
1836
+ "learning_rate": 0.00016022264150943394,
1837
+ "loss": 0.0687,
1838
+ "step": 12850
1839
+ },
1840
+ {
1841
+ "epoch": 28.67,
1842
+ "learning_rate": 0.00015965660377358488,
1843
+ "loss": 0.0736,
1844
+ "step": 12900
1845
+ },
1846
+ {
1847
+ "epoch": 28.78,
1848
+ "learning_rate": 0.00015909056603773583,
1849
+ "loss": 0.0675,
1850
+ "step": 12950
1851
+ },
1852
+ {
1853
+ "epoch": 28.89,
1854
+ "learning_rate": 0.00015852452830188678,
1855
+ "loss": 0.0695,
1856
+ "step": 13000
1857
+ },
1858
+ {
1859
+ "epoch": 29.0,
1860
+ "learning_rate": 0.00015795849056603772,
1861
+ "loss": 0.0719,
1862
+ "step": 13050
1863
+ },
1864
+ {
1865
+ "epoch": 29.11,
1866
+ "learning_rate": 0.00015739245283018867,
1867
+ "loss": 0.0675,
1868
+ "step": 13100
1869
+ },
1870
+ {
1871
+ "epoch": 29.22,
1872
+ "learning_rate": 0.00015682641509433962,
1873
+ "loss": 0.0692,
1874
+ "step": 13150
1875
+ },
1876
+ {
1877
+ "epoch": 29.33,
1878
+ "learning_rate": 0.00015626037735849056,
1879
+ "loss": 0.0671,
1880
+ "step": 13200
1881
+ },
1882
+ {
1883
+ "epoch": 29.33,
1884
+ "eval_loss": 0.573063850402832,
1885
+ "eval_runtime": 235.4966,
1886
+ "eval_samples_per_second": 24.238,
1887
+ "eval_steps_per_second": 3.032,
1888
+ "eval_wer": 0.35485380592915133,
1889
+ "step": 13200
1890
+ },
1891
+ {
1892
+ "epoch": 29.44,
1893
+ "learning_rate": 0.00015569433962264148,
1894
+ "loss": 0.0648,
1895
+ "step": 13250
1896
+ },
1897
+ {
1898
+ "epoch": 29.56,
1899
+ "learning_rate": 0.00015512830188679243,
1900
+ "loss": 0.0768,
1901
+ "step": 13300
1902
+ },
1903
+ {
1904
+ "epoch": 29.67,
1905
+ "learning_rate": 0.00015456226415094338,
1906
+ "loss": 0.0627,
1907
+ "step": 13350
1908
+ },
1909
+ {
1910
+ "epoch": 29.78,
1911
+ "learning_rate": 0.00015399622641509432,
1912
+ "loss": 0.0657,
1913
+ "step": 13400
1914
+ },
1915
+ {
1916
+ "epoch": 29.89,
1917
+ "learning_rate": 0.00015343018867924527,
1918
+ "loss": 0.0698,
1919
+ "step": 13450
1920
+ },
1921
+ {
1922
+ "epoch": 30.0,
1923
+ "learning_rate": 0.00015286415094339622,
1924
+ "loss": 0.0683,
1925
+ "step": 13500
1926
+ },
1927
+ {
1928
+ "epoch": 30.11,
1929
+ "learning_rate": 0.00015229811320754716,
1930
+ "loss": 0.0616,
1931
+ "step": 13550
1932
+ },
1933
+ {
1934
+ "epoch": 30.22,
1935
+ "learning_rate": 0.0001517320754716981,
1936
+ "loss": 0.0649,
1937
+ "step": 13600
1938
+ },
1939
+ {
1940
+ "epoch": 30.22,
1941
+ "eval_loss": 0.5660180449485779,
1942
+ "eval_runtime": 233.6416,
1943
+ "eval_samples_per_second": 24.431,
1944
+ "eval_steps_per_second": 3.056,
1945
+ "eval_wer": 0.3600423220135512,
1946
+ "step": 13600
1947
+ },
1948
+ {
1949
+ "epoch": 30.33,
1950
+ "learning_rate": 0.00015116603773584905,
1951
+ "loss": 0.0674,
1952
+ "step": 13650
1953
+ },
1954
+ {
1955
+ "epoch": 30.44,
1956
+ "learning_rate": 0.00015059999999999997,
1957
+ "loss": 0.0645,
1958
+ "step": 13700
1959
+ },
1960
+ {
1961
+ "epoch": 30.56,
1962
+ "learning_rate": 0.00015003396226415092,
1963
+ "loss": 0.0643,
1964
+ "step": 13750
1965
+ },
1966
+ {
1967
+ "epoch": 30.67,
1968
+ "learning_rate": 0.00014946792452830187,
1969
+ "loss": 0.0631,
1970
+ "step": 13800
1971
+ },
1972
+ {
1973
+ "epoch": 30.78,
1974
+ "learning_rate": 0.00014890188679245281,
1975
+ "loss": 0.064,
1976
+ "step": 13850
1977
+ },
1978
+ {
1979
+ "epoch": 30.89,
1980
+ "learning_rate": 0.00014833584905660376,
1981
+ "loss": 0.0666,
1982
+ "step": 13900
1983
+ },
1984
+ {
1985
+ "epoch": 31.0,
1986
+ "learning_rate": 0.0001477698113207547,
1987
+ "loss": 0.0656,
1988
+ "step": 13950
1989
+ },
1990
+ {
1991
+ "epoch": 31.11,
1992
+ "learning_rate": 0.00014720377358490565,
1993
+ "loss": 0.0614,
1994
+ "step": 14000
1995
+ },
1996
+ {
1997
+ "epoch": 31.11,
1998
+ "eval_loss": 0.5769771933555603,
1999
+ "eval_runtime": 234.4224,
2000
+ "eval_samples_per_second": 24.349,
2001
+ "eval_steps_per_second": 3.046,
2002
+ "eval_wer": 0.35595255051173014,
2003
+ "step": 14000
2004
+ },
2005
+ {
2006
+ "epoch": 31.22,
2007
+ "learning_rate": 0.0001466377358490566,
2008
+ "loss": 0.0609,
2009
+ "step": 14050
2010
+ },
2011
+ {
2012
+ "epoch": 31.33,
2013
+ "learning_rate": 0.00014607169811320755,
2014
+ "loss": 0.0615,
2015
+ "step": 14100
2016
+ },
2017
+ {
2018
+ "epoch": 31.44,
2019
+ "learning_rate": 0.0001455056603773585,
2020
+ "loss": 0.0595,
2021
+ "step": 14150
2022
+ },
2023
+ {
2024
+ "epoch": 31.56,
2025
+ "learning_rate": 0.0001449396226415094,
2026
+ "loss": 0.0535,
2027
+ "step": 14200
2028
+ },
2029
+ {
2030
+ "epoch": 31.67,
2031
+ "learning_rate": 0.00014437358490566036,
2032
+ "loss": 0.0662,
2033
+ "step": 14250
2034
+ },
2035
+ {
2036
+ "epoch": 31.78,
2037
+ "learning_rate": 0.0001438075471698113,
2038
+ "loss": 0.0636,
2039
+ "step": 14300
2040
+ },
2041
+ {
2042
+ "epoch": 31.89,
2043
+ "learning_rate": 0.00014324150943396225,
2044
+ "loss": 0.065,
2045
+ "step": 14350
2046
+ },
2047
+ {
2048
+ "epoch": 32.0,
2049
+ "learning_rate": 0.0001426754716981132,
2050
+ "loss": 0.0605,
2051
+ "step": 14400
2052
+ },
2053
+ {
2054
+ "epoch": 32.0,
2055
+ "eval_loss": 0.5667794942855835,
2056
+ "eval_runtime": 236.4511,
2057
+ "eval_samples_per_second": 24.14,
2058
+ "eval_steps_per_second": 3.02,
2059
+ "eval_wer": 0.35633914582782267,
2060
+ "step": 14400
2061
+ },
2062
+ {
2063
+ "epoch": 32.11,
2064
+ "learning_rate": 0.00014210943396226414,
2065
+ "loss": 0.0576,
2066
+ "step": 14450
2067
+ },
2068
+ {
2069
+ "epoch": 32.22,
2070
+ "learning_rate": 0.0001415433962264151,
2071
+ "loss": 0.0576,
2072
+ "step": 14500
2073
+ },
2074
+ {
2075
+ "epoch": 32.33,
2076
+ "learning_rate": 0.00014097735849056604,
2077
+ "loss": 0.0567,
2078
+ "step": 14550
2079
+ },
2080
+ {
2081
+ "epoch": 32.44,
2082
+ "learning_rate": 0.00014041132075471698,
2083
+ "loss": 0.0638,
2084
+ "step": 14600
2085
+ },
2086
+ {
2087
+ "epoch": 32.56,
2088
+ "learning_rate": 0.0001398452830188679,
2089
+ "loss": 0.0563,
2090
+ "step": 14650
2091
+ },
2092
+ {
2093
+ "epoch": 32.67,
2094
+ "learning_rate": 0.00013927924528301885,
2095
+ "loss": 0.0606,
2096
+ "step": 14700
2097
+ },
2098
+ {
2099
+ "epoch": 32.78,
2100
+ "learning_rate": 0.0001387132075471698,
2101
+ "loss": 0.066,
2102
+ "step": 14750
2103
+ },
2104
+ {
2105
+ "epoch": 32.89,
2106
+ "learning_rate": 0.00013814716981132074,
2107
+ "loss": 0.0594,
2108
+ "step": 14800
2109
+ },
2110
+ {
2111
+ "epoch": 32.89,
2112
+ "eval_loss": 0.5590910911560059,
2113
+ "eval_runtime": 236.6548,
2114
+ "eval_samples_per_second": 24.12,
2115
+ "eval_steps_per_second": 3.017,
2116
+ "eval_wer": 0.3464707917065131,
2117
+ "step": 14800
2118
+ },
2119
+ {
2120
+ "epoch": 33.0,
2121
+ "learning_rate": 0.0001375811320754717,
2122
+ "loss": 0.0567,
2123
+ "step": 14850
2124
+ },
2125
+ {
2126
+ "epoch": 33.11,
2127
+ "learning_rate": 0.00013701509433962264,
2128
+ "loss": 0.0593,
2129
+ "step": 14900
2130
+ },
2131
+ {
2132
+ "epoch": 33.22,
2133
+ "learning_rate": 0.00013644905660377358,
2134
+ "loss": 0.0503,
2135
+ "step": 14950
2136
+ },
2137
+ {
2138
+ "epoch": 33.33,
2139
+ "learning_rate": 0.00013588301886792453,
2140
+ "loss": 0.0548,
2141
+ "step": 15000
2142
+ },
2143
+ {
2144
+ "epoch": 33.44,
2145
+ "learning_rate": 0.00013531698113207548,
2146
+ "loss": 0.058,
2147
+ "step": 15050
2148
+ },
2149
+ {
2150
+ "epoch": 33.56,
2151
+ "learning_rate": 0.0001347509433962264,
2152
+ "loss": 0.0593,
2153
+ "step": 15100
2154
+ },
2155
+ {
2156
+ "epoch": 33.67,
2157
+ "learning_rate": 0.00013418490566037734,
2158
+ "loss": 0.0576,
2159
+ "step": 15150
2160
+ },
2161
+ {
2162
+ "epoch": 33.78,
2163
+ "learning_rate": 0.0001336188679245283,
2164
+ "loss": 0.0622,
2165
+ "step": 15200
2166
+ },
2167
+ {
2168
+ "epoch": 33.78,
2169
+ "eval_loss": 0.6206709742546082,
2170
+ "eval_runtime": 237.9375,
2171
+ "eval_samples_per_second": 23.989,
2172
+ "eval_steps_per_second": 3.001,
2173
+ "eval_wer": 0.35127271247482045,
2174
+ "step": 15200
2175
+ },
2176
+ {
2177
+ "epoch": 33.89,
2178
+ "learning_rate": 0.00013305283018867923,
2179
+ "loss": 0.0554,
2180
+ "step": 15250
2181
+ },
2182
+ {
2183
+ "epoch": 34.0,
2184
+ "learning_rate": 0.00013248679245283018,
2185
+ "loss": 0.0536,
2186
+ "step": 15300
2187
+ },
2188
+ {
2189
+ "epoch": 34.11,
2190
+ "learning_rate": 0.00013192075471698113,
2191
+ "loss": 0.0585,
2192
+ "step": 15350
2193
+ },
2194
+ {
2195
+ "epoch": 34.22,
2196
+ "learning_rate": 0.00013135471698113207,
2197
+ "loss": 0.0604,
2198
+ "step": 15400
2199
+ },
2200
+ {
2201
+ "epoch": 34.33,
2202
+ "learning_rate": 0.00013078867924528302,
2203
+ "loss": 0.0586,
2204
+ "step": 15450
2205
+ },
2206
+ {
2207
+ "epoch": 34.44,
2208
+ "learning_rate": 0.00013023396226415093,
2209
+ "loss": 0.0648,
2210
+ "step": 15500
2211
+ },
2212
+ {
2213
+ "epoch": 34.56,
2214
+ "learning_rate": 0.00012966792452830187,
2215
+ "loss": 0.0534,
2216
+ "step": 15550
2217
+ },
2218
+ {
2219
+ "epoch": 34.67,
2220
+ "learning_rate": 0.00012910188679245282,
2221
+ "loss": 0.0597,
2222
+ "step": 15600
2223
+ },
2224
+ {
2225
+ "epoch": 34.67,
2226
+ "eval_loss": 0.545280933380127,
2227
+ "eval_runtime": 234.5113,
2228
+ "eval_samples_per_second": 24.34,
2229
+ "eval_steps_per_second": 3.045,
2230
+ "eval_wer": 0.35078438154922986,
2231
+ "step": 15600
2232
+ },
2233
+ {
2234
+ "epoch": 34.78,
2235
+ "learning_rate": 0.00012853584905660377,
2236
+ "loss": 0.058,
2237
+ "step": 15650
2238
+ },
2239
+ {
2240
+ "epoch": 34.89,
2241
+ "learning_rate": 0.0001279698113207547,
2242
+ "loss": 0.0557,
2243
+ "step": 15700
2244
+ },
2245
+ {
2246
+ "epoch": 35.0,
2247
+ "learning_rate": 0.00012740377358490566,
2248
+ "loss": 0.0557,
2249
+ "step": 15750
2250
+ },
2251
+ {
2252
+ "epoch": 35.11,
2253
+ "learning_rate": 0.0001268377358490566,
2254
+ "loss": 0.0529,
2255
+ "step": 15800
2256
+ },
2257
+ {
2258
+ "epoch": 35.22,
2259
+ "learning_rate": 0.00012627169811320753,
2260
+ "loss": 0.0544,
2261
+ "step": 15850
2262
+ },
2263
+ {
2264
+ "epoch": 35.33,
2265
+ "learning_rate": 0.00012570566037735847,
2266
+ "loss": 0.0538,
2267
+ "step": 15900
2268
+ },
2269
+ {
2270
+ "epoch": 35.44,
2271
+ "learning_rate": 0.00012513962264150942,
2272
+ "loss": 0.0517,
2273
+ "step": 15950
2274
+ },
2275
+ {
2276
+ "epoch": 35.56,
2277
+ "learning_rate": 0.00012457358490566037,
2278
+ "loss": 0.0566,
2279
+ "step": 16000
2280
+ },
2281
+ {
2282
+ "epoch": 35.56,
2283
+ "eval_loss": 0.6024277210235596,
2284
+ "eval_runtime": 233.9908,
2285
+ "eval_samples_per_second": 24.394,
2286
+ "eval_steps_per_second": 3.051,
2287
+ "eval_wer": 0.3531649948114839,
2288
+ "step": 16000
2289
+ },
2290
+ {
2291
+ "epoch": 35.67,
2292
+ "learning_rate": 0.0001240075471698113,
2293
+ "loss": 0.051,
2294
+ "step": 16050
2295
+ },
2296
+ {
2297
+ "epoch": 35.78,
2298
+ "learning_rate": 0.00012344150943396226,
2299
+ "loss": 0.0591,
2300
+ "step": 16100
2301
+ },
2302
+ {
2303
+ "epoch": 35.89,
2304
+ "learning_rate": 0.0001228754716981132,
2305
+ "loss": 0.0569,
2306
+ "step": 16150
2307
+ },
2308
+ {
2309
+ "epoch": 36.0,
2310
+ "learning_rate": 0.00012230943396226415,
2311
+ "loss": 0.0563,
2312
+ "step": 16200
2313
+ },
2314
+ {
2315
+ "epoch": 36.11,
2316
+ "learning_rate": 0.00012174339622641508,
2317
+ "loss": 0.0533,
2318
+ "step": 16250
2319
+ },
2320
+ {
2321
+ "epoch": 36.22,
2322
+ "learning_rate": 0.00012117735849056603,
2323
+ "loss": 0.0566,
2324
+ "step": 16300
2325
+ },
2326
+ {
2327
+ "epoch": 36.33,
2328
+ "learning_rate": 0.00012061132075471698,
2329
+ "loss": 0.0556,
2330
+ "step": 16350
2331
+ },
2332
+ {
2333
+ "epoch": 36.44,
2334
+ "learning_rate": 0.00012004528301886791,
2335
+ "loss": 0.0524,
2336
+ "step": 16400
2337
+ },
2338
+ {
2339
+ "epoch": 36.44,
2340
+ "eval_loss": 0.6054043173789978,
2341
+ "eval_runtime": 232.789,
2342
+ "eval_samples_per_second": 24.52,
2343
+ "eval_steps_per_second": 3.067,
2344
+ "eval_wer": 0.3538161027122714,
2345
+ "step": 16400
2346
+ },
2347
+ {
2348
+ "epoch": 36.56,
2349
+ "learning_rate": 0.00011947924528301886,
2350
+ "loss": 0.0568,
2351
+ "step": 16450
2352
+ },
2353
+ {
2354
+ "epoch": 36.67,
2355
+ "learning_rate": 0.0001189132075471698,
2356
+ "loss": 0.0554,
2357
+ "step": 16500
2358
+ },
2359
+ {
2360
+ "epoch": 36.78,
2361
+ "learning_rate": 0.00011834716981132075,
2362
+ "loss": 0.0499,
2363
+ "step": 16550
2364
+ },
2365
+ {
2366
+ "epoch": 36.89,
2367
+ "learning_rate": 0.0001177811320754717,
2368
+ "loss": 0.0518,
2369
+ "step": 16600
2370
+ },
2371
+ {
2372
+ "epoch": 37.0,
2373
+ "learning_rate": 0.00011721509433962263,
2374
+ "loss": 0.0571,
2375
+ "step": 16650
2376
+ },
2377
+ {
2378
+ "epoch": 37.11,
2379
+ "learning_rate": 0.00011664905660377358,
2380
+ "loss": 0.0542,
2381
+ "step": 16700
2382
+ },
2383
+ {
2384
+ "epoch": 37.22,
2385
+ "learning_rate": 0.00011608301886792452,
2386
+ "loss": 0.0502,
2387
+ "step": 16750
2388
+ },
2389
+ {
2390
+ "epoch": 37.33,
2391
+ "learning_rate": 0.00011551698113207547,
2392
+ "loss": 0.045,
2393
+ "step": 16800
2394
+ },
2395
+ {
2396
+ "epoch": 37.33,
2397
+ "eval_loss": 0.6067692041397095,
2398
+ "eval_runtime": 233.7086,
2399
+ "eval_samples_per_second": 24.424,
2400
+ "eval_steps_per_second": 3.055,
2401
+ "eval_wer": 0.3464097503408143,
2402
+ "step": 16800
2403
+ },
2404
+ {
2405
+ "epoch": 37.44,
2406
+ "learning_rate": 0.0001149509433962264,
2407
+ "loss": 0.0536,
2408
+ "step": 16850
2409
+ },
2410
+ {
2411
+ "epoch": 37.56,
2412
+ "learning_rate": 0.00011438490566037735,
2413
+ "loss": 0.0513,
2414
+ "step": 16900
2415
+ },
2416
+ {
2417
+ "epoch": 37.67,
2418
+ "learning_rate": 0.0001138188679245283,
2419
+ "loss": 0.0513,
2420
+ "step": 16950
2421
+ },
2422
+ {
2423
+ "epoch": 37.78,
2424
+ "learning_rate": 0.00011325283018867924,
2425
+ "loss": 0.0546,
2426
+ "step": 17000
2427
+ },
2428
+ {
2429
+ "epoch": 37.89,
2430
+ "learning_rate": 0.00011268679245283019,
2431
+ "loss": 0.0552,
2432
+ "step": 17050
2433
+ },
2434
+ {
2435
+ "epoch": 38.0,
2436
+ "learning_rate": 0.00011212075471698112,
2437
+ "loss": 0.0511,
2438
+ "step": 17100
2439
+ },
2440
+ {
2441
+ "epoch": 38.11,
2442
+ "learning_rate": 0.00011155471698113207,
2443
+ "loss": 0.0517,
2444
+ "step": 17150
2445
+ },
2446
+ {
2447
+ "epoch": 38.22,
2448
+ "learning_rate": 0.00011098867924528301,
2449
+ "loss": 0.0526,
2450
+ "step": 17200
2451
+ },
2452
+ {
2453
+ "epoch": 38.22,
2454
+ "eval_loss": 0.5827322602272034,
2455
+ "eval_runtime": 234.6281,
2456
+ "eval_samples_per_second": 24.328,
2457
+ "eval_steps_per_second": 3.043,
2458
+ "eval_wer": 0.34946181862575537,
2459
+ "step": 17200
2460
+ },
2461
+ {
2462
+ "epoch": 38.33,
2463
+ "learning_rate": 0.00011042264150943396,
2464
+ "loss": 0.0452,
2465
+ "step": 17250
2466
+ },
2467
+ {
2468
+ "epoch": 38.44,
2469
+ "learning_rate": 0.0001098566037735849,
2470
+ "loss": 0.0471,
2471
+ "step": 17300
2472
+ },
2473
+ {
2474
+ "epoch": 38.56,
2475
+ "learning_rate": 0.00010929056603773584,
2476
+ "loss": 0.049,
2477
+ "step": 17350
2478
+ },
2479
+ {
2480
+ "epoch": 38.67,
2481
+ "learning_rate": 0.00010873584905660376,
2482
+ "loss": 0.0551,
2483
+ "step": 17400
2484
+ },
2485
+ {
2486
+ "epoch": 38.78,
2487
+ "learning_rate": 0.0001081698113207547,
2488
+ "loss": 0.0522,
2489
+ "step": 17450
2490
+ },
2491
+ {
2492
+ "epoch": 38.89,
2493
+ "learning_rate": 0.00010760377358490565,
2494
+ "loss": 0.0504,
2495
+ "step": 17500
2496
+ },
2497
+ {
2498
+ "epoch": 39.0,
2499
+ "learning_rate": 0.0001070377358490566,
2500
+ "loss": 0.0474,
2501
+ "step": 17550
2502
+ },
2503
+ {
2504
+ "epoch": 39.11,
2505
+ "learning_rate": 0.00010647169811320753,
2506
+ "loss": 0.0437,
2507
+ "step": 17600
2508
+ },
2509
+ {
2510
+ "epoch": 39.11,
2511
+ "eval_loss": 0.6006141901016235,
2512
+ "eval_runtime": 234.8995,
2513
+ "eval_samples_per_second": 24.3,
2514
+ "eval_steps_per_second": 3.04,
2515
+ "eval_wer": 0.33963415874824504,
2516
+ "step": 17600
2517
+ },
2518
+ {
2519
+ "epoch": 39.22,
2520
+ "learning_rate": 0.00010590566037735848,
2521
+ "loss": 0.0444,
2522
+ "step": 17650
2523
+ },
2524
+ {
2525
+ "epoch": 39.33,
2526
+ "learning_rate": 0.00010533962264150943,
2527
+ "loss": 0.0496,
2528
+ "step": 17700
2529
+ },
2530
+ {
2531
+ "epoch": 39.44,
2532
+ "learning_rate": 0.00010477358490566037,
2533
+ "loss": 0.0514,
2534
+ "step": 17750
2535
+ },
2536
+ {
2537
+ "epoch": 39.56,
2538
+ "learning_rate": 0.0001042075471698113,
2539
+ "loss": 0.0506,
2540
+ "step": 17800
2541
+ },
2542
+ {
2543
+ "epoch": 39.67,
2544
+ "learning_rate": 0.00010364150943396225,
2545
+ "loss": 0.0494,
2546
+ "step": 17850
2547
+ },
2548
+ {
2549
+ "epoch": 39.78,
2550
+ "learning_rate": 0.0001030754716981132,
2551
+ "loss": 0.046,
2552
+ "step": 17900
2553
+ },
2554
+ {
2555
+ "epoch": 39.89,
2556
+ "learning_rate": 0.00010250943396226414,
2557
+ "loss": 0.0515,
2558
+ "step": 17950
2559
+ },
2560
+ {
2561
+ "epoch": 40.0,
2562
+ "learning_rate": 0.00010194339622641509,
2563
+ "loss": 0.0498,
2564
+ "step": 18000
2565
+ },
2566
+ {
2567
+ "epoch": 40.0,
2568
+ "eval_loss": 0.546351969242096,
2569
+ "eval_runtime": 234.7053,
2570
+ "eval_samples_per_second": 24.32,
2571
+ "eval_steps_per_second": 3.042,
2572
+ "eval_wer": 0.3422996317170936,
2573
+ "step": 18000
2574
+ },
2575
+ {
2576
+ "epoch": 40.11,
2577
+ "learning_rate": 0.00010137735849056602,
2578
+ "loss": 0.0514,
2579
+ "step": 18050
2580
+ },
2581
+ {
2582
+ "epoch": 40.22,
2583
+ "learning_rate": 0.00010081132075471697,
2584
+ "loss": 0.0454,
2585
+ "step": 18100
2586
+ },
2587
+ {
2588
+ "epoch": 40.33,
2589
+ "learning_rate": 0.00010024528301886792,
2590
+ "loss": 0.044,
2591
+ "step": 18150
2592
+ },
2593
+ {
2594
+ "epoch": 40.44,
2595
+ "learning_rate": 9.967924528301886e-05,
2596
+ "loss": 0.047,
2597
+ "step": 18200
2598
+ },
2599
+ {
2600
+ "epoch": 40.56,
2601
+ "learning_rate": 9.911320754716981e-05,
2602
+ "loss": 0.0412,
2603
+ "step": 18250
2604
+ },
2605
+ {
2606
+ "epoch": 40.67,
2607
+ "learning_rate": 9.854716981132074e-05,
2608
+ "loss": 0.0484,
2609
+ "step": 18300
2610
+ },
2611
+ {
2612
+ "epoch": 40.78,
2613
+ "learning_rate": 9.798113207547169e-05,
2614
+ "loss": 0.0413,
2615
+ "step": 18350
2616
+ },
2617
+ {
2618
+ "epoch": 40.89,
2619
+ "learning_rate": 9.741509433962264e-05,
2620
+ "loss": 0.0494,
2621
+ "step": 18400
2622
+ },
2623
+ {
2624
+ "epoch": 40.89,
2625
+ "eval_loss": 0.6018606424331665,
2626
+ "eval_runtime": 234.1001,
2627
+ "eval_samples_per_second": 24.383,
2628
+ "eval_steps_per_second": 3.05,
2629
+ "eval_wer": 0.3394713817730482,
2630
+ "step": 18400
2631
+ },
2632
+ {
2633
+ "epoch": 41.0,
2634
+ "learning_rate": 9.684905660377358e-05,
2635
+ "loss": 0.0462,
2636
+ "step": 18450
2637
+ },
2638
+ {
2639
+ "epoch": 41.11,
2640
+ "learning_rate": 9.628301886792452e-05,
2641
+ "loss": 0.0404,
2642
+ "step": 18500
2643
+ },
2644
+ {
2645
+ "epoch": 41.22,
2646
+ "learning_rate": 9.571698113207546e-05,
2647
+ "loss": 0.0452,
2648
+ "step": 18550
2649
+ },
2650
+ {
2651
+ "epoch": 41.33,
2652
+ "learning_rate": 9.515094339622641e-05,
2653
+ "loss": 0.042,
2654
+ "step": 18600
2655
+ },
2656
+ {
2657
+ "epoch": 41.44,
2658
+ "learning_rate": 9.458490566037736e-05,
2659
+ "loss": 0.0449,
2660
+ "step": 18650
2661
+ },
2662
+ {
2663
+ "epoch": 41.56,
2664
+ "learning_rate": 9.40188679245283e-05,
2665
+ "loss": 0.0431,
2666
+ "step": 18700
2667
+ },
2668
+ {
2669
+ "epoch": 41.67,
2670
+ "learning_rate": 9.345283018867923e-05,
2671
+ "loss": 0.0407,
2672
+ "step": 18750
2673
+ },
2674
+ {
2675
+ "epoch": 41.78,
2676
+ "learning_rate": 9.288679245283018e-05,
2677
+ "loss": 0.0476,
2678
+ "step": 18800
2679
+ },
2680
+ {
2681
+ "epoch": 41.78,
2682
+ "eval_loss": 0.5934082865715027,
2683
+ "eval_runtime": 234.8837,
2684
+ "eval_samples_per_second": 24.301,
2685
+ "eval_steps_per_second": 3.04,
2686
+ "eval_wer": 0.3392068691883533,
2687
+ "step": 18800
2688
+ },
2689
+ {
2690
+ "epoch": 41.89,
2691
+ "learning_rate": 9.232075471698113e-05,
2692
+ "loss": 0.0475,
2693
+ "step": 18850
2694
+ },
2695
+ {
2696
+ "epoch": 42.0,
2697
+ "learning_rate": 9.175471698113207e-05,
2698
+ "loss": 0.0422,
2699
+ "step": 18900
2700
+ },
2701
+ {
2702
+ "epoch": 42.11,
2703
+ "learning_rate": 9.118867924528302e-05,
2704
+ "loss": 0.0424,
2705
+ "step": 18950
2706
+ },
2707
+ {
2708
+ "epoch": 42.22,
2709
+ "learning_rate": 9.062264150943395e-05,
2710
+ "loss": 0.0446,
2711
+ "step": 19000
2712
+ },
2713
+ {
2714
+ "epoch": 42.33,
2715
+ "learning_rate": 9.00566037735849e-05,
2716
+ "loss": 0.0435,
2717
+ "step": 19050
2718
+ },
2719
+ {
2720
+ "epoch": 42.44,
2721
+ "learning_rate": 8.949056603773585e-05,
2722
+ "loss": 0.0423,
2723
+ "step": 19100
2724
+ },
2725
+ {
2726
+ "epoch": 42.56,
2727
+ "learning_rate": 8.892452830188679e-05,
2728
+ "loss": 0.0403,
2729
+ "step": 19150
2730
+ },
2731
+ {
2732
+ "epoch": 42.67,
2733
+ "learning_rate": 8.835849056603773e-05,
2734
+ "loss": 0.0414,
2735
+ "step": 19200
2736
+ },
2737
+ {
2738
+ "epoch": 42.67,
2739
+ "eval_loss": 0.6187946796417236,
2740
+ "eval_runtime": 235.2867,
2741
+ "eval_samples_per_second": 24.26,
2742
+ "eval_steps_per_second": 3.035,
2743
+ "eval_wer": 0.3374366695830875,
2744
+ "step": 19200
2745
+ },
2746
+ {
2747
+ "epoch": 42.78,
2748
+ "learning_rate": 8.779245283018867e-05,
2749
+ "loss": 0.0444,
2750
+ "step": 19250
2751
+ },
2752
+ {
2753
+ "epoch": 42.89,
2754
+ "learning_rate": 8.722641509433962e-05,
2755
+ "loss": 0.0476,
2756
+ "step": 19300
2757
+ },
2758
+ {
2759
+ "epoch": 43.0,
2760
+ "learning_rate": 8.666037735849057e-05,
2761
+ "loss": 0.0426,
2762
+ "step": 19350
2763
+ },
2764
+ {
2765
+ "epoch": 43.11,
2766
+ "learning_rate": 8.609433962264151e-05,
2767
+ "loss": 0.04,
2768
+ "step": 19400
2769
+ },
2770
+ {
2771
+ "epoch": 43.22,
2772
+ "learning_rate": 8.552830188679245e-05,
2773
+ "loss": 0.0452,
2774
+ "step": 19450
2775
+ },
2776
+ {
2777
+ "epoch": 43.33,
2778
+ "learning_rate": 8.496226415094339e-05,
2779
+ "loss": 0.0396,
2780
+ "step": 19500
2781
+ },
2782
+ {
2783
+ "epoch": 43.44,
2784
+ "learning_rate": 8.439622641509434e-05,
2785
+ "loss": 0.0404,
2786
+ "step": 19550
2787
+ },
2788
+ {
2789
+ "epoch": 43.56,
2790
+ "learning_rate": 8.383018867924528e-05,
2791
+ "loss": 0.0382,
2792
+ "step": 19600
2793
+ },
2794
+ {
2795
+ "epoch": 43.56,
2796
+ "eval_loss": 0.6085843443870544,
2797
+ "eval_runtime": 233.4151,
2798
+ "eval_samples_per_second": 24.454,
2799
+ "eval_steps_per_second": 3.059,
2800
+ "eval_wer": 0.33273648442427817,
2801
+ "step": 19600
2802
+ },
2803
+ {
2804
+ "epoch": 43.67,
2805
+ "learning_rate": 8.326415094339622e-05,
2806
+ "loss": 0.0408,
2807
+ "step": 19650
2808
+ },
2809
+ {
2810
+ "epoch": 43.78,
2811
+ "learning_rate": 8.269811320754716e-05,
2812
+ "loss": 0.0443,
2813
+ "step": 19700
2814
+ },
2815
+ {
2816
+ "epoch": 43.89,
2817
+ "learning_rate": 8.213207547169811e-05,
2818
+ "loss": 0.0464,
2819
+ "step": 19750
2820
+ },
2821
+ {
2822
+ "epoch": 44.0,
2823
+ "learning_rate": 8.156603773584906e-05,
2824
+ "loss": 0.0383,
2825
+ "step": 19800
2826
+ },
2827
+ {
2828
+ "epoch": 44.11,
2829
+ "learning_rate": 8.1e-05,
2830
+ "loss": 0.0447,
2831
+ "step": 19850
2832
+ },
2833
+ {
2834
+ "epoch": 44.22,
2835
+ "learning_rate": 8.043396226415094e-05,
2836
+ "loss": 0.0419,
2837
+ "step": 19900
2838
+ },
2839
+ {
2840
+ "epoch": 44.33,
2841
+ "learning_rate": 7.986792452830188e-05,
2842
+ "loss": 0.0389,
2843
+ "step": 19950
2844
+ },
2845
+ {
2846
+ "epoch": 44.44,
2847
+ "learning_rate": 7.930188679245283e-05,
2848
+ "loss": 0.0403,
2849
+ "step": 20000
2850
+ },
2851
+ {
2852
+ "epoch": 44.44,
2853
+ "eval_loss": 0.6319227814674377,
2854
+ "eval_runtime": 233.7986,
2855
+ "eval_samples_per_second": 24.414,
2856
+ "eval_steps_per_second": 3.054,
2857
+ "eval_wer": 0.3353002217836287,
2858
+ "step": 20000
2859
+ },
2860
+ {
2861
+ "epoch": 44.56,
2862
+ "learning_rate": 7.873584905660378e-05,
2863
+ "loss": 0.0397,
2864
+ "step": 20050
2865
+ },
2866
+ {
2867
+ "epoch": 44.67,
2868
+ "learning_rate": 7.816981132075472e-05,
2869
+ "loss": 0.0406,
2870
+ "step": 20100
2871
+ },
2872
+ {
2873
+ "epoch": 44.78,
2874
+ "learning_rate": 7.760377358490566e-05,
2875
+ "loss": 0.039,
2876
+ "step": 20150
2877
+ },
2878
+ {
2879
+ "epoch": 44.89,
2880
+ "learning_rate": 7.70377358490566e-05,
2881
+ "loss": 0.0388,
2882
+ "step": 20200
2883
+ },
2884
+ {
2885
+ "epoch": 45.0,
2886
+ "learning_rate": 7.647169811320755e-05,
2887
+ "loss": 0.0391,
2888
+ "step": 20250
2889
+ },
2890
+ {
2891
+ "epoch": 45.11,
2892
+ "learning_rate": 7.59056603773585e-05,
2893
+ "loss": 0.0343,
2894
+ "step": 20300
2895
+ },
2896
+ {
2897
+ "epoch": 45.22,
2898
+ "learning_rate": 7.533962264150943e-05,
2899
+ "loss": 0.0381,
2900
+ "step": 20350
2901
+ },
2902
+ {
2903
+ "epoch": 45.33,
2904
+ "learning_rate": 7.477358490566037e-05,
2905
+ "loss": 0.0391,
2906
+ "step": 20400
2907
+ },
2908
+ {
2909
+ "epoch": 45.33,
2910
+ "eval_loss": 0.6092292666435242,
2911
+ "eval_runtime": 234.709,
2912
+ "eval_samples_per_second": 24.319,
2913
+ "eval_steps_per_second": 3.042,
2914
+ "eval_wer": 0.337253545485991,
2915
+ "step": 20400
2916
+ },
2917
+ {
2918
+ "epoch": 45.44,
2919
+ "learning_rate": 7.420754716981131e-05,
2920
+ "loss": 0.0404,
2921
+ "step": 20450
2922
+ },
2923
+ {
2924
+ "epoch": 45.56,
2925
+ "learning_rate": 7.364150943396225e-05,
2926
+ "loss": 0.0441,
2927
+ "step": 20500
2928
+ },
2929
+ {
2930
+ "epoch": 45.67,
2931
+ "learning_rate": 7.30754716981132e-05,
2932
+ "loss": 0.0374,
2933
+ "step": 20550
2934
+ },
2935
+ {
2936
+ "epoch": 45.78,
2937
+ "learning_rate": 7.250943396226415e-05,
2938
+ "loss": 0.0379,
2939
+ "step": 20600
2940
+ },
2941
+ {
2942
+ "epoch": 45.89,
2943
+ "learning_rate": 7.19433962264151e-05,
2944
+ "loss": 0.04,
2945
+ "step": 20650
2946
+ },
2947
+ {
2948
+ "epoch": 46.0,
2949
+ "learning_rate": 7.137735849056603e-05,
2950
+ "loss": 0.041,
2951
+ "step": 20700
2952
+ },
2953
+ {
2954
+ "epoch": 46.11,
2955
+ "learning_rate": 7.081132075471697e-05,
2956
+ "loss": 0.0367,
2957
+ "step": 20750
2958
+ },
2959
+ {
2960
+ "epoch": 46.22,
2961
+ "learning_rate": 7.024528301886792e-05,
2962
+ "loss": 0.0364,
2963
+ "step": 20800
2964
+ },
2965
+ {
2966
+ "epoch": 46.22,
2967
+ "eval_loss": 0.6104596257209778,
2968
+ "eval_runtime": 238.3111,
2969
+ "eval_samples_per_second": 23.952,
2970
+ "eval_steps_per_second": 2.996,
2971
+ "eval_wer": 0.33383522900685697,
2972
+ "step": 20800
2973
+ },
2974
+ {
2975
+ "epoch": 46.33,
2976
+ "learning_rate": 6.967924528301887e-05,
2977
+ "loss": 0.0379,
2978
+ "step": 20850
2979
+ },
2980
+ {
2981
+ "epoch": 46.44,
2982
+ "learning_rate": 6.91132075471698e-05,
2983
+ "loss": 0.0392,
2984
+ "step": 20900
2985
+ },
2986
+ {
2987
+ "epoch": 46.56,
2988
+ "learning_rate": 6.854716981132075e-05,
2989
+ "loss": 0.0402,
2990
+ "step": 20950
2991
+ },
2992
+ {
2993
+ "epoch": 46.67,
2994
+ "learning_rate": 6.798113207547169e-05,
2995
+ "loss": 0.042,
2996
+ "step": 21000
2997
+ },
2998
+ {
2999
+ "epoch": 46.78,
3000
+ "learning_rate": 6.741509433962264e-05,
3001
+ "loss": 0.0366,
3002
+ "step": 21050
3003
+ },
3004
+ {
3005
+ "epoch": 46.89,
3006
+ "learning_rate": 6.684905660377359e-05,
3007
+ "loss": 0.0377,
3008
+ "step": 21100
3009
+ },
3010
+ {
3011
+ "epoch": 47.0,
3012
+ "learning_rate": 6.628301886792452e-05,
3013
+ "loss": 0.0345,
3014
+ "step": 21150
3015
+ },
3016
+ {
3017
+ "epoch": 47.11,
3018
+ "learning_rate": 6.571698113207546e-05,
3019
+ "loss": 0.0408,
3020
+ "step": 21200
3021
+ },
3022
+ {
3023
+ "epoch": 47.11,
3024
+ "eval_loss": 0.6161568760871887,
3025
+ "eval_runtime": 235.8491,
3026
+ "eval_samples_per_second": 24.202,
3027
+ "eval_steps_per_second": 3.027,
3028
+ "eval_wer": 0.3335910635440617,
3029
+ "step": 21200
3030
+ },
3031
+ {
3032
+ "epoch": 47.22,
3033
+ "learning_rate": 6.515094339622641e-05,
3034
+ "loss": 0.0377,
3035
+ "step": 21250
3036
+ },
3037
+ {
3038
+ "epoch": 47.33,
3039
+ "learning_rate": 6.459622641509433e-05,
3040
+ "loss": 0.0387,
3041
+ "step": 21300
3042
+ },
3043
+ {
3044
+ "epoch": 47.44,
3045
+ "learning_rate": 6.403018867924528e-05,
3046
+ "loss": 0.0349,
3047
+ "step": 21350
3048
+ },
3049
+ {
3050
+ "epoch": 47.56,
3051
+ "learning_rate": 6.346415094339622e-05,
3052
+ "loss": 0.0398,
3053
+ "step": 21400
3054
+ },
3055
+ {
3056
+ "epoch": 47.67,
3057
+ "learning_rate": 6.289811320754717e-05,
3058
+ "loss": 0.036,
3059
+ "step": 21450
3060
+ },
3061
+ {
3062
+ "epoch": 47.78,
3063
+ "learning_rate": 6.233207547169812e-05,
3064
+ "loss": 0.0359,
3065
+ "step": 21500
3066
+ },
3067
+ {
3068
+ "epoch": 47.89,
3069
+ "learning_rate": 6.176603773584905e-05,
3070
+ "loss": 0.0363,
3071
+ "step": 21550
3072
+ },
3073
+ {
3074
+ "epoch": 48.0,
3075
+ "learning_rate": 6.12e-05,
3076
+ "loss": 0.0347,
3077
+ "step": 21600
3078
+ },
3079
+ {
3080
+ "epoch": 48.0,
3081
+ "eval_loss": 0.5747588276863098,
3082
+ "eval_runtime": 234.422,
3083
+ "eval_samples_per_second": 24.349,
3084
+ "eval_steps_per_second": 3.046,
3085
+ "eval_wer": 0.32937920931084297,
3086
+ "step": 21600
3087
+ },
3088
+ {
3089
+ "epoch": 48.11,
3090
+ "learning_rate": 6.0633962264150937e-05,
3091
+ "loss": 0.0357,
3092
+ "step": 21650
3093
+ },
3094
+ {
3095
+ "epoch": 48.22,
3096
+ "learning_rate": 6.0067924528301876e-05,
3097
+ "loss": 0.036,
3098
+ "step": 21700
3099
+ },
3100
+ {
3101
+ "epoch": 48.33,
3102
+ "learning_rate": 5.950188679245282e-05,
3103
+ "loss": 0.0362,
3104
+ "step": 21750
3105
+ },
3106
+ {
3107
+ "epoch": 48.44,
3108
+ "learning_rate": 5.893584905660376e-05,
3109
+ "loss": 0.0313,
3110
+ "step": 21800
3111
+ },
3112
+ {
3113
+ "epoch": 48.56,
3114
+ "learning_rate": 5.836981132075471e-05,
3115
+ "loss": 0.0365,
3116
+ "step": 21850
3117
+ },
3118
+ {
3119
+ "epoch": 48.67,
3120
+ "learning_rate": 5.780377358490565e-05,
3121
+ "loss": 0.0326,
3122
+ "step": 21900
3123
+ },
3124
+ {
3125
+ "epoch": 48.78,
3126
+ "learning_rate": 5.7237735849056595e-05,
3127
+ "loss": 0.0339,
3128
+ "step": 21950
3129
+ },
3130
+ {
3131
+ "epoch": 48.89,
3132
+ "learning_rate": 5.667169811320754e-05,
3133
+ "loss": 0.0372,
3134
+ "step": 22000
3135
+ },
3136
+ {
3137
+ "epoch": 48.89,
3138
+ "eval_loss": 0.5893652439117432,
3139
+ "eval_runtime": 233.9923,
3140
+ "eval_samples_per_second": 24.394,
3141
+ "eval_steps_per_second": 3.051,
3142
+ "eval_wer": 0.3295826805298391,
3143
+ "step": 22000
3144
+ },
3145
+ {
3146
+ "epoch": 49.0,
3147
+ "learning_rate": 5.610566037735848e-05,
3148
+ "loss": 0.0346,
3149
+ "step": 22050
3150
+ },
3151
+ {
3152
+ "epoch": 49.11,
3153
+ "learning_rate": 5.553962264150943e-05,
3154
+ "loss": 0.0356,
3155
+ "step": 22100
3156
+ },
3157
+ {
3158
+ "epoch": 49.22,
3159
+ "learning_rate": 5.497358490566037e-05,
3160
+ "loss": 0.0371,
3161
+ "step": 22150
3162
+ },
3163
+ {
3164
+ "epoch": 49.33,
3165
+ "learning_rate": 5.4407547169811314e-05,
3166
+ "loss": 0.0342,
3167
+ "step": 22200
3168
+ },
3169
+ {
3170
+ "epoch": 49.44,
3171
+ "learning_rate": 5.3841509433962254e-05,
3172
+ "loss": 0.0323,
3173
+ "step": 22250
3174
+ },
3175
+ {
3176
+ "epoch": 49.56,
3177
+ "learning_rate": 5.32754716981132e-05,
3178
+ "loss": 0.0326,
3179
+ "step": 22300
3180
+ },
3181
+ {
3182
+ "epoch": 49.67,
3183
+ "learning_rate": 5.270943396226415e-05,
3184
+ "loss": 0.0349,
3185
+ "step": 22350
3186
+ },
3187
+ {
3188
+ "epoch": 49.78,
3189
+ "learning_rate": 5.214339622641509e-05,
3190
+ "loss": 0.0378,
3191
+ "step": 22400
3192
+ },
3193
+ {
3194
+ "epoch": 49.78,
3195
+ "eval_loss": 0.6031844019889832,
3196
+ "eval_runtime": 234.1475,
3197
+ "eval_samples_per_second": 24.378,
3198
+ "eval_steps_per_second": 3.049,
3199
+ "eval_wer": 0.33098663194091194,
3200
+ "step": 22400
3201
+ },
3202
+ {
3203
+ "epoch": 49.89,
3204
+ "learning_rate": 5.157735849056603e-05,
3205
+ "loss": 0.03,
3206
+ "step": 22450
3207
+ },
3208
+ {
3209
+ "epoch": 50.0,
3210
+ "learning_rate": 5.101132075471697e-05,
3211
+ "loss": 0.0345,
3212
+ "step": 22500
3213
+ },
3214
+ {
3215
+ "epoch": 50.11,
3216
+ "learning_rate": 5.044528301886792e-05,
3217
+ "loss": 0.0358,
3218
+ "step": 22550
3219
+ },
3220
+ {
3221
+ "epoch": 50.22,
3222
+ "learning_rate": 4.987924528301886e-05,
3223
+ "loss": 0.0345,
3224
+ "step": 22600
3225
+ },
3226
+ {
3227
+ "epoch": 50.33,
3228
+ "learning_rate": 4.9313207547169806e-05,
3229
+ "loss": 0.035,
3230
+ "step": 22650
3231
+ },
3232
+ {
3233
+ "epoch": 50.44,
3234
+ "learning_rate": 4.874716981132075e-05,
3235
+ "loss": 0.0331,
3236
+ "step": 22700
3237
+ },
3238
+ {
3239
+ "epoch": 50.56,
3240
+ "learning_rate": 4.818113207547169e-05,
3241
+ "loss": 0.0339,
3242
+ "step": 22750
3243
+ },
3244
+ {
3245
+ "epoch": 50.67,
3246
+ "learning_rate": 4.761509433962264e-05,
3247
+ "loss": 0.0371,
3248
+ "step": 22800
3249
+ },
3250
+ {
3251
+ "epoch": 50.67,
3252
+ "eval_loss": 0.5830812454223633,
3253
+ "eval_runtime": 238.3507,
3254
+ "eval_samples_per_second": 23.948,
3255
+ "eval_steps_per_second": 2.996,
3256
+ "eval_wer": 0.3274665798522799,
3257
+ "step": 22800
3258
+ },
3259
+ {
3260
+ "epoch": 50.78,
3261
+ "learning_rate": 4.704905660377358e-05,
3262
+ "loss": 0.0345,
3263
+ "step": 22850
3264
+ },
3265
+ {
3266
+ "epoch": 50.89,
3267
+ "learning_rate": 4.6483018867924525e-05,
3268
+ "loss": 0.0325,
3269
+ "step": 22900
3270
+ },
3271
+ {
3272
+ "epoch": 51.0,
3273
+ "learning_rate": 4.5916981132075465e-05,
3274
+ "loss": 0.0387,
3275
+ "step": 22950
3276
+ },
3277
+ {
3278
+ "epoch": 51.11,
3279
+ "learning_rate": 4.535094339622641e-05,
3280
+ "loss": 0.0367,
3281
+ "step": 23000
3282
+ },
3283
+ {
3284
+ "epoch": 51.22,
3285
+ "learning_rate": 4.478490566037736e-05,
3286
+ "loss": 0.0317,
3287
+ "step": 23050
3288
+ },
3289
+ {
3290
+ "epoch": 51.33,
3291
+ "learning_rate": 4.42188679245283e-05,
3292
+ "loss": 0.0349,
3293
+ "step": 23100
3294
+ },
3295
+ {
3296
+ "epoch": 51.44,
3297
+ "learning_rate": 4.3652830188679244e-05,
3298
+ "loss": 0.0322,
3299
+ "step": 23150
3300
+ },
3301
+ {
3302
+ "epoch": 51.56,
3303
+ "learning_rate": 4.3086792452830184e-05,
3304
+ "loss": 0.0323,
3305
+ "step": 23200
3306
+ },
3307
+ {
3308
+ "epoch": 51.56,
3309
+ "eval_loss": 0.5856512784957886,
3310
+ "eval_runtime": 235.7185,
3311
+ "eval_samples_per_second": 24.215,
3312
+ "eval_steps_per_second": 3.029,
3313
+ "eval_wer": 0.3265713064886972,
3314
+ "step": 23200
3315
+ },
3316
+ {
3317
+ "epoch": 51.67,
3318
+ "learning_rate": 4.252075471698113e-05,
3319
+ "loss": 0.0329,
3320
+ "step": 23250
3321
+ },
3322
+ {
3323
+ "epoch": 51.78,
3324
+ "learning_rate": 4.195471698113207e-05,
3325
+ "loss": 0.0348,
3326
+ "step": 23300
3327
+ },
3328
+ {
3329
+ "epoch": 51.89,
3330
+ "learning_rate": 4.1388679245283016e-05,
3331
+ "loss": 0.0324,
3332
+ "step": 23350
3333
+ },
3334
+ {
3335
+ "epoch": 52.0,
3336
+ "learning_rate": 4.0822641509433956e-05,
3337
+ "loss": 0.0311,
3338
+ "step": 23400
3339
+ },
3340
+ {
3341
+ "epoch": 52.11,
3342
+ "learning_rate": 4.02566037735849e-05,
3343
+ "loss": 0.031,
3344
+ "step": 23450
3345
+ },
3346
+ {
3347
+ "epoch": 52.22,
3348
+ "learning_rate": 3.969056603773585e-05,
3349
+ "loss": 0.0329,
3350
+ "step": 23500
3351
+ },
3352
+ {
3353
+ "epoch": 52.33,
3354
+ "learning_rate": 3.912452830188679e-05,
3355
+ "loss": 0.031,
3356
+ "step": 23550
3357
+ },
3358
+ {
3359
+ "epoch": 52.44,
3360
+ "learning_rate": 3.8558490566037735e-05,
3361
+ "loss": 0.0313,
3362
+ "step": 23600
3363
+ },
3364
+ {
3365
+ "epoch": 52.44,
3366
+ "eval_loss": 0.591876745223999,
3367
+ "eval_runtime": 234.2309,
3368
+ "eval_samples_per_second": 24.369,
3369
+ "eval_steps_per_second": 3.048,
3370
+ "eval_wer": 0.3222170224021812,
3371
+ "step": 23600
3372
+ },
3373
+ {
3374
+ "epoch": 52.56,
3375
+ "learning_rate": 3.7992452830188675e-05,
3376
+ "loss": 0.0326,
3377
+ "step": 23650
3378
+ },
3379
+ {
3380
+ "epoch": 52.67,
3381
+ "learning_rate": 3.742641509433962e-05,
3382
+ "loss": 0.0277,
3383
+ "step": 23700
3384
+ },
3385
+ {
3386
+ "epoch": 52.78,
3387
+ "learning_rate": 3.686037735849056e-05,
3388
+ "loss": 0.0316,
3389
+ "step": 23750
3390
+ },
3391
+ {
3392
+ "epoch": 52.89,
3393
+ "learning_rate": 3.629433962264151e-05,
3394
+ "loss": 0.0309,
3395
+ "step": 23800
3396
+ },
3397
+ {
3398
+ "epoch": 53.0,
3399
+ "learning_rate": 3.5728301886792454e-05,
3400
+ "loss": 0.032,
3401
+ "step": 23850
3402
+ },
3403
+ {
3404
+ "epoch": 53.11,
3405
+ "learning_rate": 3.5162264150943394e-05,
3406
+ "loss": 0.0289,
3407
+ "step": 23900
3408
+ },
3409
+ {
3410
+ "epoch": 53.22,
3411
+ "learning_rate": 3.459622641509434e-05,
3412
+ "loss": 0.0284,
3413
+ "step": 23950
3414
+ },
3415
+ {
3416
+ "epoch": 53.33,
3417
+ "learning_rate": 3.403018867924528e-05,
3418
+ "loss": 0.0309,
3419
+ "step": 24000
3420
+ },
3421
+ {
3422
+ "epoch": 53.33,
3423
+ "eval_loss": 0.5990718007087708,
3424
+ "eval_runtime": 234.3903,
3425
+ "eval_samples_per_second": 24.353,
3426
+ "eval_steps_per_second": 3.046,
3427
+ "eval_wer": 0.32537082629662034,
3428
+ "step": 24000
3429
+ },
3430
+ {
3431
+ "epoch": 53.44,
3432
+ "learning_rate": 3.346415094339622e-05,
3433
+ "loss": 0.0284,
3434
+ "step": 24050
3435
+ },
3436
+ {
3437
+ "epoch": 53.56,
3438
+ "learning_rate": 3.289811320754717e-05,
3439
+ "loss": 0.0299,
3440
+ "step": 24100
3441
+ },
3442
+ {
3443
+ "epoch": 53.67,
3444
+ "learning_rate": 3.2332075471698106e-05,
3445
+ "loss": 0.0315,
3446
+ "step": 24150
3447
+ },
3448
+ {
3449
+ "epoch": 53.78,
3450
+ "learning_rate": 3.176603773584905e-05,
3451
+ "loss": 0.0323,
3452
+ "step": 24200
3453
+ },
3454
+ {
3455
+ "epoch": 53.89,
3456
+ "learning_rate": 3.119999999999999e-05,
3457
+ "loss": 0.0319,
3458
+ "step": 24250
3459
+ },
3460
+ {
3461
+ "epoch": 54.0,
3462
+ "learning_rate": 3.063396226415094e-05,
3463
+ "loss": 0.0301,
3464
+ "step": 24300
3465
+ },
3466
+ {
3467
+ "epoch": 54.11,
3468
+ "learning_rate": 3.0067924528301882e-05,
3469
+ "loss": 0.0297,
3470
+ "step": 24350
3471
+ },
3472
+ {
3473
+ "epoch": 54.22,
3474
+ "learning_rate": 2.9501886792452825e-05,
3475
+ "loss": 0.0322,
3476
+ "step": 24400
3477
+ },
3478
+ {
3479
+ "epoch": 54.22,
3480
+ "eval_loss": 0.6152312755584717,
3481
+ "eval_runtime": 234.7575,
3482
+ "eval_samples_per_second": 24.314,
3483
+ "eval_steps_per_second": 3.041,
3484
+ "eval_wer": 0.3252894378090219,
3485
+ "step": 24400
3486
+ },
3487
+ {
3488
+ "epoch": 54.33,
3489
+ "learning_rate": 2.893584905660377e-05,
3490
+ "loss": 0.0283,
3491
+ "step": 24450
3492
+ },
3493
+ {
3494
+ "epoch": 54.44,
3495
+ "learning_rate": 2.8369811320754715e-05,
3496
+ "loss": 0.0322,
3497
+ "step": 24500
3498
+ },
3499
+ {
3500
+ "epoch": 54.56,
3501
+ "learning_rate": 2.7803773584905658e-05,
3502
+ "loss": 0.0297,
3503
+ "step": 24550
3504
+ },
3505
+ {
3506
+ "epoch": 54.67,
3507
+ "learning_rate": 2.72377358490566e-05,
3508
+ "loss": 0.0291,
3509
+ "step": 24600
3510
+ },
3511
+ {
3512
+ "epoch": 54.78,
3513
+ "learning_rate": 2.6671698113207544e-05,
3514
+ "loss": 0.0323,
3515
+ "step": 24650
3516
+ },
3517
+ {
3518
+ "epoch": 54.89,
3519
+ "learning_rate": 2.6105660377358488e-05,
3520
+ "loss": 0.0305,
3521
+ "step": 24700
3522
+ },
3523
+ {
3524
+ "epoch": 55.0,
3525
+ "learning_rate": 2.553962264150943e-05,
3526
+ "loss": 0.0268,
3527
+ "step": 24750
3528
+ },
3529
+ {
3530
+ "epoch": 55.11,
3531
+ "learning_rate": 2.4973584905660374e-05,
3532
+ "loss": 0.0304,
3533
+ "step": 24800
3534
+ },
3535
+ {
3536
+ "epoch": 55.11,
3537
+ "eval_loss": 0.603844404220581,
3538
+ "eval_runtime": 234.1113,
3539
+ "eval_samples_per_second": 24.382,
3540
+ "eval_steps_per_second": 3.05,
3541
+ "eval_wer": 0.3228681303029686,
3542
+ "step": 24800
3543
+ },
3544
+ {
3545
+ "epoch": 55.22,
3546
+ "learning_rate": 2.440754716981132e-05,
3547
+ "loss": 0.0276,
3548
+ "step": 24850
3549
+ },
3550
+ {
3551
+ "epoch": 55.33,
3552
+ "learning_rate": 2.3841509433962263e-05,
3553
+ "loss": 0.0266,
3554
+ "step": 24900
3555
+ },
3556
+ {
3557
+ "epoch": 55.44,
3558
+ "learning_rate": 2.3275471698113207e-05,
3559
+ "loss": 0.0292,
3560
+ "step": 24950
3561
+ },
3562
+ {
3563
+ "epoch": 55.56,
3564
+ "learning_rate": 2.270943396226415e-05,
3565
+ "loss": 0.0316,
3566
+ "step": 25000
3567
+ },
3568
+ {
3569
+ "epoch": 55.67,
3570
+ "learning_rate": 2.2143396226415093e-05,
3571
+ "loss": 0.0288,
3572
+ "step": 25050
3573
+ },
3574
+ {
3575
+ "epoch": 55.78,
3576
+ "learning_rate": 2.1577358490566036e-05,
3577
+ "loss": 0.0293,
3578
+ "step": 25100
3579
+ },
3580
+ {
3581
+ "epoch": 55.89,
3582
+ "learning_rate": 2.101132075471698e-05,
3583
+ "loss": 0.0302,
3584
+ "step": 25150
3585
+ },
3586
+ {
3587
+ "epoch": 56.0,
3588
+ "learning_rate": 2.0456603773584902e-05,
3589
+ "loss": 0.0288,
3590
+ "step": 25200
3591
+ },
3592
+ {
3593
+ "epoch": 56.0,
3594
+ "eval_loss": 0.5989021062850952,
3595
+ "eval_runtime": 235.0378,
3596
+ "eval_samples_per_second": 24.285,
3597
+ "eval_steps_per_second": 3.038,
3598
+ "eval_wer": 0.32227806376788004,
3599
+ "step": 25200
3600
+ },
3601
+ {
3602
+ "epoch": 56.11,
3603
+ "learning_rate": 1.9890566037735846e-05,
3604
+ "loss": 0.0309,
3605
+ "step": 25250
3606
+ },
3607
+ {
3608
+ "epoch": 56.22,
3609
+ "learning_rate": 1.9324528301886792e-05,
3610
+ "loss": 0.0277,
3611
+ "step": 25300
3612
+ },
3613
+ {
3614
+ "epoch": 56.33,
3615
+ "learning_rate": 1.8758490566037735e-05,
3616
+ "loss": 0.031,
3617
+ "step": 25350
3618
+ },
3619
+ {
3620
+ "epoch": 56.44,
3621
+ "learning_rate": 1.819245283018868e-05,
3622
+ "loss": 0.0276,
3623
+ "step": 25400
3624
+ },
3625
+ {
3626
+ "epoch": 56.56,
3627
+ "learning_rate": 1.762641509433962e-05,
3628
+ "loss": 0.0273,
3629
+ "step": 25450
3630
+ },
3631
+ {
3632
+ "epoch": 56.67,
3633
+ "learning_rate": 1.7060377358490565e-05,
3634
+ "loss": 0.028,
3635
+ "step": 25500
3636
+ },
3637
+ {
3638
+ "epoch": 56.78,
3639
+ "learning_rate": 1.6494339622641508e-05,
3640
+ "loss": 0.0287,
3641
+ "step": 25550
3642
+ },
3643
+ {
3644
+ "epoch": 56.89,
3645
+ "learning_rate": 1.592830188679245e-05,
3646
+ "loss": 0.0307,
3647
+ "step": 25600
3648
+ },
3649
+ {
3650
+ "epoch": 56.89,
3651
+ "eval_loss": 0.5922682285308838,
3652
+ "eval_runtime": 236.4109,
3653
+ "eval_samples_per_second": 24.144,
3654
+ "eval_steps_per_second": 3.02,
3655
+ "eval_wer": 0.32016196309032086,
3656
+ "step": 25600
3657
+ },
3658
+ {
3659
+ "epoch": 57.0,
3660
+ "learning_rate": 1.5362264150943397e-05,
3661
+ "loss": 0.027,
3662
+ "step": 25650
3663
+ },
3664
+ {
3665
+ "epoch": 57.11,
3666
+ "learning_rate": 1.4796226415094337e-05,
3667
+ "loss": 0.0277,
3668
+ "step": 25700
3669
+ },
3670
+ {
3671
+ "epoch": 57.22,
3672
+ "learning_rate": 1.423018867924528e-05,
3673
+ "loss": 0.0265,
3674
+ "step": 25750
3675
+ },
3676
+ {
3677
+ "epoch": 57.33,
3678
+ "learning_rate": 1.3664150943396225e-05,
3679
+ "loss": 0.0266,
3680
+ "step": 25800
3681
+ },
3682
+ {
3683
+ "epoch": 57.44,
3684
+ "learning_rate": 1.3098113207547168e-05,
3685
+ "loss": 0.0277,
3686
+ "step": 25850
3687
+ },
3688
+ {
3689
+ "epoch": 57.56,
3690
+ "learning_rate": 1.2532075471698111e-05,
3691
+ "loss": 0.0246,
3692
+ "step": 25900
3693
+ },
3694
+ {
3695
+ "epoch": 57.67,
3696
+ "learning_rate": 1.1966037735849054e-05,
3697
+ "loss": 0.0278,
3698
+ "step": 25950
3699
+ },
3700
+ {
3701
+ "epoch": 57.78,
3702
+ "learning_rate": 1.14e-05,
3703
+ "loss": 0.0258,
3704
+ "step": 26000
3705
+ },
3706
+ {
3707
+ "epoch": 57.78,
3708
+ "eval_loss": 0.6004139184951782,
3709
+ "eval_runtime": 234.8842,
3710
+ "eval_samples_per_second": 24.301,
3711
+ "eval_steps_per_second": 3.04,
3712
+ "eval_wer": 0.31922599548293895,
3713
+ "step": 26000
3714
+ },
3715
+ {
3716
+ "epoch": 57.89,
3717
+ "learning_rate": 1.0833962264150942e-05,
3718
+ "loss": 0.0252,
3719
+ "step": 26050
3720
+ },
3721
+ {
3722
+ "epoch": 58.0,
3723
+ "learning_rate": 1.0267924528301886e-05,
3724
+ "loss": 0.027,
3725
+ "step": 26100
3726
+ },
3727
+ {
3728
+ "epoch": 58.11,
3729
+ "learning_rate": 9.701886792452829e-06,
3730
+ "loss": 0.0277,
3731
+ "step": 26150
3732
+ },
3733
+ {
3734
+ "epoch": 58.22,
3735
+ "learning_rate": 9.135849056603773e-06,
3736
+ "loss": 0.0237,
3737
+ "step": 26200
3738
+ },
3739
+ {
3740
+ "epoch": 58.33,
3741
+ "learning_rate": 8.569811320754717e-06,
3742
+ "loss": 0.0232,
3743
+ "step": 26250
3744
+ },
3745
+ {
3746
+ "epoch": 58.44,
3747
+ "learning_rate": 8.00377358490566e-06,
3748
+ "loss": 0.028,
3749
+ "step": 26300
3750
+ },
3751
+ {
3752
+ "epoch": 58.56,
3753
+ "learning_rate": 7.437735849056603e-06,
3754
+ "loss": 0.0293,
3755
+ "step": 26350
3756
+ },
3757
+ {
3758
+ "epoch": 58.67,
3759
+ "learning_rate": 6.871698113207546e-06,
3760
+ "loss": 0.0261,
3761
+ "step": 26400
3762
+ },
3763
+ {
3764
+ "epoch": 58.67,
3765
+ "eval_loss": 0.5958569049835205,
3766
+ "eval_runtime": 234.8641,
3767
+ "eval_samples_per_second": 24.303,
3768
+ "eval_steps_per_second": 3.04,
3769
+ "eval_wer": 0.3189004415325452,
3770
+ "step": 26400
3771
+ },
3772
+ {
3773
+ "epoch": 58.78,
3774
+ "learning_rate": 6.30566037735849e-06,
3775
+ "loss": 0.0293,
3776
+ "step": 26450
3777
+ },
3778
+ {
3779
+ "epoch": 58.89,
3780
+ "learning_rate": 5.739622641509433e-06,
3781
+ "loss": 0.0278,
3782
+ "step": 26500
3783
+ },
3784
+ {
3785
+ "epoch": 59.0,
3786
+ "learning_rate": 5.173584905660377e-06,
3787
+ "loss": 0.0262,
3788
+ "step": 26550
3789
+ },
3790
+ {
3791
+ "epoch": 59.11,
3792
+ "learning_rate": 4.60754716981132e-06,
3793
+ "loss": 0.025,
3794
+ "step": 26600
3795
+ },
3796
+ {
3797
+ "epoch": 59.22,
3798
+ "learning_rate": 4.041509433962263e-06,
3799
+ "loss": 0.0286,
3800
+ "step": 26650
3801
+ },
3802
+ {
3803
+ "epoch": 59.33,
3804
+ "learning_rate": 3.4754716981132073e-06,
3805
+ "loss": 0.0295,
3806
+ "step": 26700
3807
+ },
3808
+ {
3809
+ "epoch": 59.44,
3810
+ "learning_rate": 2.909433962264151e-06,
3811
+ "loss": 0.0249,
3812
+ "step": 26750
3813
+ },
3814
+ {
3815
+ "epoch": 59.56,
3816
+ "learning_rate": 2.343396226415094e-06,
3817
+ "loss": 0.0277,
3818
+ "step": 26800
3819
+ },
3820
+ {
3821
+ "epoch": 59.56,
3822
+ "eval_loss": 0.593723475933075,
3823
+ "eval_runtime": 234.2801,
3824
+ "eval_samples_per_second": 24.364,
3825
+ "eval_steps_per_second": 3.048,
3826
+ "eval_wer": 0.318188292266059,
3827
+ "step": 26800
3828
+ }
3829
+ ],
3830
+ "max_steps": 27000,
3831
+ "num_train_epochs": 60,
3832
+ "total_flos": 1.1861566534187504e+20,
3833
+ "trial_name": null,
3834
+ "trial_params": null
3835
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dafdf73917d65f6586d3cb1852d033f8740cd2b08fb2897a59ae4a845add7384
3
+ size 2927
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"e": 0, "n": 1, "a": 2, "i": 3, "t": 4, "o": 5, "d": 6, "r": 7, " ": 8, "l": 9, "s": 10, "h": 11, "g": 12, "m": 13, "k": 14, "v": 15, "j": 16, "w": 17, "z": 18, "u": 19, "b": 20, "c": 21, "p": 22, "f": 23, "y": 24, "\u00e9": 25, "'": 26, "x": 27, "\u00eb": 28, "q": 29, "-": 30, "\u00ea": 31, "\u00e0": 32, "\u00e4": 33, "\u00e8": 34, "\u00ef": 35, "\u00e2": 36, "\u00fb": 37, "\u00f6": 38, "\u00f4": 39, "\u00fc": 40, "\u00ee": 41, "\u00e7": 42, "\u00e6": 43, "\u00f9": 44, "\u0153": 45, "<unk>": 46, "<pad>": 47}