jon-tow commited on
Commit
a8f2f28
·
verified ·
1 Parent(s): 3aeae29

feat(tokenizer): expose merge ranks and special tokens for GGUF

Browse files
Files changed (1) hide show
  1. tokenization_arcade100k.py +4 -1
tokenization_arcade100k.py CHANGED
@@ -126,6 +126,9 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
126
  self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
127
  self.eos_token = self.decoder[self.tokenizer.eot_token]
128
  self.pad_token = self.decoder[self.tokenizer.eot_token]
 
 
 
129
 
130
  def __len__(self):
131
  return self.tokenizer.n_vocab
@@ -270,4 +273,4 @@ class Arcade100kTokenizer(PreTrainedTokenizer):
270
  token_ids = [token_ids]
271
  if skip_special_tokens:
272
  token_ids = [i for i in token_ids if i < self.tokenizer.eot_token]
273
- return self.tokenizer.decode(token_ids)
 
126
  self.decoder.update({i: n for n, i in self.tokenizer._special_tokens.items()})
127
  self.eos_token = self.decoder[self.tokenizer.eot_token]
128
  self.pad_token = self.decoder[self.tokenizer.eot_token]
129
+ # Expose for convenience
130
+ self.mergeable_ranks = self.tokenizer._mergeable_ranks
131
+ self.special_tokens = self.tokenizer._special_tokens
132
 
133
  def __len__(self):
134
  return self.tokenizer.n_vocab
 
273
  token_ids = [token_ids]
274
  if skip_special_tokens:
275
  token_ids = [i for i in token_ids if i < self.tokenizer.eot_token]
276
+ return self.tokenizer.decode(token_ids)