chanicpanic commited on
Commit
9efb814
·
verified ·
1 Parent(s): 7fa875a

Fix tokenizer special tokens

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +5 -7
  2. tokenizer_config.json +6 -2
special_tokens_map.json CHANGED
@@ -1,9 +1,7 @@
1
  {
2
- "pad_token": {
3
- "content": "<pad>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- }
9
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "mask_token": "<mask>",
5
+ "pad_token": "<pad>",
6
+ "unk_token": "<unk>"
 
 
7
  }
tokenizer_config.json CHANGED
@@ -41,12 +41,16 @@
41
  "special": true
42
  }
43
  },
44
- "clean_up_tokenization_spaces": true,
 
 
 
45
  "model_input_names": [
46
  "labels",
47
  "pixel_values"
48
  ],
49
  "model_max_length": 1000000000000000019884624838656,
50
  "pad_token": "<pad>",
51
- "tokenizer_class": "PreTrainedTokenizerFast"
 
52
  }
 
41
  "special": true
42
  }
43
  },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "eos_token": "</s>",
47
+ "mask_token": "<mask>",
48
  "model_input_names": [
49
  "labels",
50
  "pixel_values"
51
  ],
52
  "model_max_length": 1000000000000000019884624838656,
53
  "pad_token": "<pad>",
54
+ "tokenizer_class": "PreTrainedTokenizerFast",
55
+ "unk_token": "<unk>"
56
  }