Rachel Bawden commited on
Commit
da39110
·
1 Parent(s): 5fb1005

update tok

Browse files
Files changed (4) hide show
  1. tokenizer.json +1 -1
  2. tokenizer_config.json +1 -1
  3. vocab-src.json +1 -1
  4. vocab-tgt.json +1 -1
tokenizer.json CHANGED
@@ -45,7 +45,7 @@
45
  "'": 4,
46
  "▁de": 5,
47
  "▁": 6,
48
- "▁<t>": 7,
49
  ",": 8,
50
  "▁l": 9,
51
  "▁la": 10,
 
45
  "'": 4,
46
  "▁de": 5,
47
  "▁": 6,
48
+ "<t>": 7,
49
  ",": 8,
50
  "▁l": 9,
51
  "▁la": 10,
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<unk>", "eos_token": "</s>", "bos_token": "<s>", "pad_token": "<pad>", "tokenizer_class": "PreTrainedTokenizerFast"}
 
1
+ {"unk_token": "<unk>", "eos_token": "</s>", "bos_token": "<s>", "pad_token": "<pad>", "sep": "<t>", "tokenizer_class": "PreTrainedTokenizerFast"}
vocab-src.json CHANGED
@@ -6,7 +6,7 @@
6
  "'": 4,
7
  "▁de": 5,
8
  "▁": 6,
9
- "▁<t>": 7,
10
  ",": 8,
11
  "▁l": 9,
12
  "▁la": 10,
 
6
  "'": 4,
7
  "▁de": 5,
8
  "▁": 6,
9
+ "<t>": 7,
10
  ",": 8,
11
  "▁l": 9,
12
  "▁la": 10,
vocab-tgt.json CHANGED
@@ -6,7 +6,7 @@
6
  "'": 4,
7
  "▁de": 5,
8
  "▁": 6,
9
- "▁<t>": 7,
10
  ",": 8,
11
  "▁l": 9,
12
  "▁la": 10,
 
6
  "'": 4,
7
  "▁de": 5,
8
  "▁": 6,
9
+ "<t>": 7,
10
  ",": 8,
11
  "▁l": 9,
12
  "▁la": 10,