Training in progress, step 500
Browse files- .ipynb_checkpoints/eval-checkpoint.py +61 -3
- .ipynb_checkpoints/run-checkpoint.sh +1 -0
- .ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py +1 -0
- config.json +3 -2
- eval.py +61 -3
- pytorch_model.bin +1 -1
- requirements.txt +3 -0
- run.sh +1 -0
- run_speech_recognition_ctc.py +1 -0
- runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/1643785646.6555233/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.1 +3 -0
- runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.0 +3 -0
- runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643325211.6916795/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.1 +0 -0
- runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.0 +0 -0
- runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643346306.8664992/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.1 +0 -0
- runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.0 +0 -0
- runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643572438.487491/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.1 +0 -0
- runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.0 +0 -0
- runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643588110.005454/events.out.tfevents.1643588110.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.1 +0 -0
- runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643588109.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.0 +0 -0
- runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643608732.4243534/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.1 +0 -0
- runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.0 +0 -0
- special_tokens_map.json +1 -1
- training_args.bin +1 -1
.ipynb_checkpoints/eval-checkpoint.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
import argparse
|
|
|
3 |
import re
|
4 |
from typing import Dict
|
5 |
|
6 |
-
from datasets import Audio, Dataset, load_dataset, load_metric
|
7 |
|
8 |
-
from transformers import AutoFeatureExtractor, pipeline
|
9 |
|
10 |
|
11 |
def log_results(result: Dataset, args: Dict[str, str]):
|
@@ -63,12 +64,50 @@ def normalize_text(text: str) -> str:
|
|
63 |
return text
|
64 |
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
def main(args):
|
67 |
# load dataset
|
68 |
dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
|
69 |
|
70 |
# for testing: only process the first two examples as a test
|
71 |
-
|
72 |
|
73 |
# load processor
|
74 |
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
|
@@ -90,6 +129,7 @@ def main(args):
|
|
90 |
batch["target"] = normalize_text(batch["sentence"])
|
91 |
return batch
|
92 |
|
|
|
93 |
# run inference on all examples
|
94 |
result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
|
95 |
|
@@ -97,6 +137,21 @@ def main(args):
|
|
97 |
# do not change function below
|
98 |
log_results(result, args)
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
if __name__ == "__main__":
|
102 |
parser = argparse.ArgumentParser()
|
@@ -123,6 +178,9 @@ if __name__ == "__main__":
|
|
123 |
parser.add_argument(
|
124 |
"--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
|
125 |
)
|
|
|
|
|
|
|
126 |
args = parser.parse_args()
|
127 |
|
128 |
main(args)
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
import argparse
|
3 |
+
import functools
|
4 |
import re
|
5 |
from typing import Dict
|
6 |
|
7 |
+
from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
|
8 |
|
9 |
+
from transformers import AutoFeatureExtractor, AutoTokenizer, pipeline
|
10 |
|
11 |
|
12 |
def log_results(result: Dataset, args: Dict[str, str]):
|
|
|
64 |
return text
|
65 |
|
66 |
|
67 |
+
def create_vocabulary_from_data(
|
68 |
+
datasets: DatasetDict,
|
69 |
+
word_delimiter_token = None,
|
70 |
+
unk_token = None,
|
71 |
+
pad_token = None,
|
72 |
+
):
|
73 |
+
# Given training and test labels create vocabulary
|
74 |
+
def extract_all_chars(batch):
|
75 |
+
all_text = " ".join(batch["target"])
|
76 |
+
vocab = list(set(all_text))
|
77 |
+
return {"vocab": [vocab], "all_text": [all_text]}
|
78 |
+
|
79 |
+
vocabs = datasets.map(
|
80 |
+
extract_all_chars,
|
81 |
+
batched=True,
|
82 |
+
batch_size=-1,
|
83 |
+
keep_in_memory=True,
|
84 |
+
remove_columns=datasets["test"].column_names,
|
85 |
+
)
|
86 |
+
|
87 |
+
|
88 |
+
vocab_dict = {v: k for k, v in enumerate(sorted(list(vocabs["test"]["vocab"])))}
|
89 |
+
|
90 |
+
# replace white space with delimiter token
|
91 |
+
if word_delimiter_token is not None:
|
92 |
+
vocab_dict[word_delimiter_token] = vocab_dict[" "]
|
93 |
+
del vocab_dict[" "]
|
94 |
+
|
95 |
+
# add unk and pad token
|
96 |
+
if unk_token is not None:
|
97 |
+
vocab_dict[unk_token] = len(vocab_dict)
|
98 |
+
|
99 |
+
if pad_token is not None:
|
100 |
+
vocab_dict[pad_token] = len(vocab_dict)
|
101 |
+
|
102 |
+
return vocab_dict
|
103 |
+
|
104 |
+
|
105 |
def main(args):
|
106 |
# load dataset
|
107 |
dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
|
108 |
|
109 |
# for testing: only process the first two examples as a test
|
110 |
+
dataset = dataset.select(range(10))
|
111 |
|
112 |
# load processor
|
113 |
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
|
|
|
129 |
batch["target"] = normalize_text(batch["sentence"])
|
130 |
return batch
|
131 |
|
132 |
+
|
133 |
# run inference on all examples
|
134 |
result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
|
135 |
|
|
|
137 |
# do not change function below
|
138 |
log_results(result, args)
|
139 |
|
140 |
+
if args.check_vocab:
|
141 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model_id)
|
142 |
+
unk_token = "[UNK]"
|
143 |
+
pad_token = "[PAD]"
|
144 |
+
word_delimiter_token = "|"
|
145 |
+
raw_datasets = DatasetDict({"test": result})
|
146 |
+
vocab_dict = create_vocabulary_from_data(
|
147 |
+
raw_datasets,
|
148 |
+
word_delimiter_token=word_delimiter_token,
|
149 |
+
unk_token=unk_token,
|
150 |
+
pad_token=pad_token,
|
151 |
+
)
|
152 |
+
print(vocab_dict)
|
153 |
+
print(set(vocab_dict) - set(tokenizer.get_vocab()))
|
154 |
+
|
155 |
|
156 |
if __name__ == "__main__":
|
157 |
parser = argparse.ArgumentParser()
|
|
|
178 |
parser.add_argument(
|
179 |
"--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
|
180 |
)
|
181 |
+
parser.add_argument(
|
182 |
+
"--check_vocab", action="store_true", help="Verify that normalized target text is within character set"
|
183 |
+
)
|
184 |
args = parser.parse_args()
|
185 |
|
186 |
main(args)
|
.ipynb_checkpoints/run-checkpoint.sh
CHANGED
@@ -16,6 +16,7 @@ python xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
|
|
16 |
--save_steps="500" \
|
17 |
--eval_steps="500" \
|
18 |
--logging_steps="100" \
|
|
|
19 |
--layerdrop="0.0" \
|
20 |
--activation_dropout="0.1" \
|
21 |
--save_total_limit="3" \
|
|
|
16 |
--save_steps="500" \
|
17 |
--eval_steps="500" \
|
18 |
--logging_steps="100" \
|
19 |
+
--min_duration_in_seconds="0.5" \
|
20 |
--layerdrop="0.0" \
|
21 |
--activation_dropout="0.1" \
|
22 |
--save_total_limit="3" \
|
.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py
CHANGED
@@ -553,6 +553,7 @@ def main():
|
|
553 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
554 |
"layerdrop": model_args.layerdrop,
|
555 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
|
|
556 |
"pad_token_id": tokenizer.pad_token_id,
|
557 |
"vocab_size": len(tokenizer),
|
558 |
"activation_dropout": model_args.activation_dropout,
|
|
|
553 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
554 |
"layerdrop": model_args.layerdrop,
|
555 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
556 |
+
"zero_infinity": True,
|
557 |
"pad_token_id": tokenizer.pad_token_id,
|
558 |
"vocab_size": len(tokenizer),
|
559 |
"activation_dropout": model_args.activation_dropout,
|
config.json
CHANGED
@@ -100,8 +100,9 @@
|
|
100 |
1
|
101 |
],
|
102 |
"torch_dtype": "float32",
|
103 |
-
"transformers_version": "4.
|
104 |
"use_weighted_layer_sum": false,
|
105 |
"vocab_size": 32,
|
106 |
-
"xvector_output_dim": 512
|
|
|
107 |
}
|
|
|
100 |
1
|
101 |
],
|
102 |
"torch_dtype": "float32",
|
103 |
+
"transformers_version": "4.17.0.dev0",
|
104 |
"use_weighted_layer_sum": false,
|
105 |
"vocab_size": 32,
|
106 |
+
"xvector_output_dim": 512,
|
107 |
+
"zero_infinity": true
|
108 |
}
|
eval.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
import argparse
|
|
|
3 |
import re
|
4 |
from typing import Dict
|
5 |
|
6 |
-
from datasets import Audio, Dataset, load_dataset, load_metric
|
7 |
|
8 |
-
from transformers import AutoFeatureExtractor, pipeline
|
9 |
|
10 |
|
11 |
def log_results(result: Dataset, args: Dict[str, str]):
|
@@ -63,12 +64,50 @@ def normalize_text(text: str) -> str:
|
|
63 |
return text
|
64 |
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
def main(args):
|
67 |
# load dataset
|
68 |
dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
|
69 |
|
70 |
# for testing: only process the first two examples as a test
|
71 |
-
|
72 |
|
73 |
# load processor
|
74 |
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
|
@@ -90,6 +129,7 @@ def main(args):
|
|
90 |
batch["target"] = normalize_text(batch["sentence"])
|
91 |
return batch
|
92 |
|
|
|
93 |
# run inference on all examples
|
94 |
result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
|
95 |
|
@@ -97,6 +137,21 @@ def main(args):
|
|
97 |
# do not change function below
|
98 |
log_results(result, args)
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
if __name__ == "__main__":
|
102 |
parser = argparse.ArgumentParser()
|
@@ -123,6 +178,9 @@ if __name__ == "__main__":
|
|
123 |
parser.add_argument(
|
124 |
"--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
|
125 |
)
|
|
|
|
|
|
|
126 |
args = parser.parse_args()
|
127 |
|
128 |
main(args)
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
import argparse
|
3 |
+
import functools
|
4 |
import re
|
5 |
from typing import Dict
|
6 |
|
7 |
+
from datasets import Audio, Dataset, DatasetDict, load_dataset, load_metric
|
8 |
|
9 |
+
from transformers import AutoFeatureExtractor, AutoTokenizer, pipeline
|
10 |
|
11 |
|
12 |
def log_results(result: Dataset, args: Dict[str, str]):
|
|
|
64 |
return text
|
65 |
|
66 |
|
67 |
+
def create_vocabulary_from_data(
|
68 |
+
datasets: DatasetDict,
|
69 |
+
word_delimiter_token = None,
|
70 |
+
unk_token = None,
|
71 |
+
pad_token = None,
|
72 |
+
):
|
73 |
+
# Given training and test labels create vocabulary
|
74 |
+
def extract_all_chars(batch):
|
75 |
+
all_text = " ".join(batch["target"])
|
76 |
+
vocab = list(set(all_text))
|
77 |
+
return {"vocab": [vocab], "all_text": [all_text]}
|
78 |
+
|
79 |
+
vocabs = datasets.map(
|
80 |
+
extract_all_chars,
|
81 |
+
batched=True,
|
82 |
+
batch_size=-1,
|
83 |
+
keep_in_memory=True,
|
84 |
+
remove_columns=datasets["test"].column_names,
|
85 |
+
)
|
86 |
+
|
87 |
+
|
88 |
+
vocab_dict = {v: k for k, v in enumerate(sorted(list(vocabs["test"]["vocab"])))}
|
89 |
+
|
90 |
+
# replace white space with delimiter token
|
91 |
+
if word_delimiter_token is not None:
|
92 |
+
vocab_dict[word_delimiter_token] = vocab_dict[" "]
|
93 |
+
del vocab_dict[" "]
|
94 |
+
|
95 |
+
# add unk and pad token
|
96 |
+
if unk_token is not None:
|
97 |
+
vocab_dict[unk_token] = len(vocab_dict)
|
98 |
+
|
99 |
+
if pad_token is not None:
|
100 |
+
vocab_dict[pad_token] = len(vocab_dict)
|
101 |
+
|
102 |
+
return vocab_dict
|
103 |
+
|
104 |
+
|
105 |
def main(args):
|
106 |
# load dataset
|
107 |
dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
|
108 |
|
109 |
# for testing: only process the first two examples as a test
|
110 |
+
dataset = dataset.select(range(10))
|
111 |
|
112 |
# load processor
|
113 |
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
|
|
|
129 |
batch["target"] = normalize_text(batch["sentence"])
|
130 |
return batch
|
131 |
|
132 |
+
|
133 |
# run inference on all examples
|
134 |
result = dataset.map(map_to_pred, remove_columns=dataset.column_names)
|
135 |
|
|
|
137 |
# do not change function below
|
138 |
log_results(result, args)
|
139 |
|
140 |
+
if args.check_vocab:
|
141 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model_id)
|
142 |
+
unk_token = "[UNK]"
|
143 |
+
pad_token = "[PAD]"
|
144 |
+
word_delimiter_token = "|"
|
145 |
+
raw_datasets = DatasetDict({"test": result})
|
146 |
+
vocab_dict = create_vocabulary_from_data(
|
147 |
+
raw_datasets,
|
148 |
+
word_delimiter_token=word_delimiter_token,
|
149 |
+
unk_token=unk_token,
|
150 |
+
pad_token=pad_token,
|
151 |
+
)
|
152 |
+
print(vocab_dict)
|
153 |
+
print(set(vocab_dict) - set(tokenizer.get_vocab()))
|
154 |
+
|
155 |
|
156 |
if __name__ == "__main__":
|
157 |
parser = argparse.ArgumentParser()
|
|
|
178 |
parser.add_argument(
|
179 |
"--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
|
180 |
)
|
181 |
+
parser.add_argument(
|
182 |
+
"--check_vocab", action="store_true", help="Verify that normalized target text is within character set"
|
183 |
+
)
|
184 |
args = parser.parse_args()
|
185 |
|
186 |
main(args)
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262054897
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b71f72aac1d492daee452de8218f59a78d50f29656419218b12834c50edc15f
|
3 |
size 1262054897
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
unidecode
|
2 |
+
tensorboard
|
3 |
+
|
run.sh
CHANGED
@@ -16,6 +16,7 @@ python xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
|
|
16 |
--save_steps="500" \
|
17 |
--eval_steps="500" \
|
18 |
--logging_steps="100" \
|
|
|
19 |
--layerdrop="0.0" \
|
20 |
--activation_dropout="0.1" \
|
21 |
--save_total_limit="3" \
|
|
|
16 |
--save_steps="500" \
|
17 |
--eval_steps="500" \
|
18 |
--logging_steps="100" \
|
19 |
+
--min_duration_in_seconds="0.5" \
|
20 |
--layerdrop="0.0" \
|
21 |
--activation_dropout="0.1" \
|
22 |
--save_total_limit="3" \
|
run_speech_recognition_ctc.py
CHANGED
@@ -553,6 +553,7 @@ def main():
|
|
553 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
554 |
"layerdrop": model_args.layerdrop,
|
555 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
|
|
556 |
"pad_token_id": tokenizer.pad_token_id,
|
557 |
"vocab_size": len(tokenizer),
|
558 |
"activation_dropout": model_args.activation_dropout,
|
|
|
553 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
554 |
"layerdrop": model_args.layerdrop,
|
555 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
556 |
+
"zero_infinity": True,
|
557 |
"pad_token_id": tokenizer.pad_token_id,
|
558 |
"vocab_size": len(tokenizer),
|
559 |
"activation_dropout": model_args.activation_dropout,
|
runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/1643785646.6555233/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9500cee755dde8faf816fe7422af4bc642d5207e5a7715fc6d36a685700c4666
|
3 |
+
size 4799
|
runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:412cb17226cf304713e4986e1eab9dffa422ea44ea63de0bdd747546e911336e
|
3 |
+
size 5852
|
runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643325211.6916795/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.1
CHANGED
Binary files a/runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643325211.6916795/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.1 and b/runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643325211.6916795/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.1 differ
|
|
runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.0
CHANGED
Binary files a/runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.0 and b/runs/Jan27_22-59-08_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643325211.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.399095.0 differ
|
|
runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643346306.8664992/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.1
CHANGED
Binary files a/runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643346306.8664992/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.1 and b/runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643346306.8664992/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.1 differ
|
|
runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.0
CHANGED
Binary files a/runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.0 and b/runs/Jan28_04-57-04_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643346306.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.541469.0 differ
|
|
runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643572438.487491/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.1
CHANGED
Binary files a/runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643572438.487491/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.1 and b/runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643572438.487491/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.1 differ
|
|
runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.0
CHANGED
Binary files a/runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.0 and b/runs/Jan30_19-35-25_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643572438.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2037878.0 differ
|
|
runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643588110.005454/events.out.tfevents.1643588110.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.1
CHANGED
Binary files a/runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643588110.005454/events.out.tfevents.1643588110.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.1 and b/runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643588110.005454/events.out.tfevents.1643588110.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.1 differ
|
|
runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643588109.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.0
CHANGED
Binary files a/runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643588109.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.0 and b/runs/Jan31_00-08-55_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643588109.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2141134.0 differ
|
|
runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643608732.4243534/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.1
CHANGED
Binary files a/runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643608732.4243534/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.1 and b/runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/1643608732.4243534/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.1 differ
|
|
runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.0
CHANGED
Binary files a/runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.0 and b/runs/Jan31_05-52-36_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643608732.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.2278718.0 differ
|
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3055
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39c0625450d0afa8d2e897190721a9173256a42e1f889cdecc94feee325632c3
|
3 |
size 3055
|