fix(tokenizer): expose `errors`
Browse files
tokenization_arcade100k.py
CHANGED
@@ -111,6 +111,8 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
|
|
111 |
**kwargs,
|
112 |
):
|
113 |
super().__init__(errors=errors, **kwargs)
|
|
|
|
|
114 |
self._tiktoken_config = _arcade100k(vocab_file)
|
115 |
self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)
|
116 |
|
|
|
111 |
**kwargs,
|
112 |
):
|
113 |
super().__init__(errors=errors, **kwargs)
|
114 |
+
self.errors = errors
|
115 |
+
|
116 |
self._tiktoken_config = _arcade100k(vocab_file)
|
117 |
self.tokenizer = tiktoken.Encoding(**self._tiktoken_config)
|
118 |
|