Rachel Bawden
commited on
Commit
·
da39110
1
Parent(s):
5fb1005
update tok
Browse files- tokenizer.json +1 -1
- tokenizer_config.json +1 -1
- vocab-src.json +1 -1
- vocab-tgt.json +1 -1
tokenizer.json
CHANGED
@@ -45,7 +45,7 @@
|
|
45 |
"'": 4,
|
46 |
"▁de": 5,
|
47 |
"▁": 6,
|
48 |
-
"
|
49 |
",": 8,
|
50 |
"▁l": 9,
|
51 |
"▁la": 10,
|
|
|
45 |
"'": 4,
|
46 |
"▁de": 5,
|
47 |
"▁": 6,
|
48 |
+
"<t>": 7,
|
49 |
",": 8,
|
50 |
"▁l": 9,
|
51 |
"▁la": 10,
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": "<unk>", "eos_token": "</s>", "bos_token": "<s>", "pad_token": "<pad>", "tokenizer_class": "PreTrainedTokenizerFast"}
|
|
|
1 |
+
{"unk_token": "<unk>", "eos_token": "</s>", "bos_token": "<s>", "pad_token": "<pad>", "sep": "<t>", "tokenizer_class": "PreTrainedTokenizerFast"}
|
vocab-src.json
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
"'": 4,
|
7 |
"▁de": 5,
|
8 |
"▁": 6,
|
9 |
-
"
|
10 |
",": 8,
|
11 |
"▁l": 9,
|
12 |
"▁la": 10,
|
|
|
6 |
"'": 4,
|
7 |
"▁de": 5,
|
8 |
"▁": 6,
|
9 |
+
"<t>": 7,
|
10 |
",": 8,
|
11 |
"▁l": 9,
|
12 |
"▁la": 10,
|
vocab-tgt.json
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
"'": 4,
|
7 |
"▁de": 5,
|
8 |
"▁": 6,
|
9 |
-
"
|
10 |
",": 8,
|
11 |
"▁l": 9,
|
12 |
"▁la": 10,
|
|
|
6 |
"'": 4,
|
7 |
"▁de": 5,
|
8 |
"▁": 6,
|
9 |
+
"<t>": 7,
|
10 |
",": 8,
|
11 |
"▁l": 9,
|
12 |
"▁la": 10,
|