cfpark00 commited on
Commit
b8c0dde
·
verified ·
1 Parent(s): 39c31fc

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
  "eos_token": "<|endoftext|>",
3
- "pad_token": "<pad>"
4
  }
 
1
  {
2
  "eos_token": "<|endoftext|>",
3
+ "pad_token": "<|endoftext|>"
4
  }
tokenizer.json CHANGED
@@ -11,15 +11,6 @@
11
  "rstrip": false,
12
  "normalized": false,
13
  "special": true
14
- },
15
- {
16
- "id": 26,
17
- "content": "<pad>",
18
- "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  }
24
  ],
25
  "normalizer": null,
@@ -55,10 +46,8 @@
55
  "B": 21,
56
  "BB": 22,
57
  "BBB": 23,
58
- "<|endoftext|>": 24,
59
- "<unk>": 25,
60
- "<pad>": 26
61
  },
62
- "unk_token": "<unk>"
63
  }
64
  }
 
11
  "rstrip": false,
12
  "normalized": false,
13
  "special": true
 
 
 
 
 
 
 
 
 
14
  }
15
  ],
16
  "normalizer": null,
 
46
  "B": 21,
47
  "BB": 22,
48
  "BBB": 23,
49
+ "<|endoftext|>": 24
 
 
50
  },
51
+ "unk_token": "<|endoftext|>"
52
  }
53
  }
tokenizer_config.json CHANGED
@@ -7,20 +7,12 @@
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
- },
11
- "26": {
12
- "content": "<pad>",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
  }
19
  },
20
  "clean_up_tokenization_spaces": false,
21
  "eos_token": "<|endoftext|>",
22
  "extra_special_tokens": {},
23
  "model_max_length": 1000000000000000019884624838656,
24
- "pad_token": "<pad>",
25
  "tokenizer_class": "PreTrainedTokenizer"
26
  }
 
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
 
 
 
 
 
 
 
 
10
  }
11
  },
12
  "clean_up_tokenization_spaces": false,
13
  "eos_token": "<|endoftext|>",
14
  "extra_special_tokens": {},
15
  "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "<|endoftext|>",
17
  "tokenizer_class": "PreTrainedTokenizer"
18
  }