Tokenizer problem
When I try to load the model, I have the following error:
"File ~/anaconda3/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py:733, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
731 tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
732 if tokenizer_class is None:
--> 733 raise ValueError(
734 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
735 )
736 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
738 # Otherwise we have to be creative.
739 # if model is an encoder decoder, the encoder tokenizer class is used by default
ValueError: Tokenizer class ReplitLMTokenizer does not exist or is not currently imported."
I tried to upgrade transformers but it did not work
If someone one day has the same issue, just use "trust_remote_code=True" as an argument of AutoTokenizer.from_pretrained
I kind of have a similar issue. I am getting this error. I have the argument 'trust_remote_code' set to True
~/.cache/huggingface/modules/transformers_modules/glaiveai/glaive-function-calling-
v1/d94dd60c9ceaff581cf7e6b5f9982b4b1716ae16/replit_lm_tokenizer.py in vocab_size(self)
71 @property
72 def vocab_size(self):
---> 73 return self.sp_model.get_piece_size()
74
75 def get_vocab(self):
AttributeError: 'ReplitLMTokenizer' object has no attribute 'sp_model'
I kind of have a similar issue. I am getting this error. I have the argument 'trust_remote_code' set to True
~/.cache/huggingface/modules/transformers_modules/glaiveai/glaive-function-calling- v1/d94dd60c9ceaff581cf7e6b5f9982b4b1716ae16/replit_lm_tokenizer.py in vocab_size(self) 71 @property 72 def vocab_size(self): ---> 73 return self.sp_model.get_piece_size() 74 75 def get_vocab(self): AttributeError: 'ReplitLMTokenizer' object has no attribute 'sp_model'
Same issue on colab
Mine has "trust remote code " set to true but its still having the error
Edit: my mistake i was passing the argument to Vllm, ive modified the init method to reflect that, will try again and let you know
def __init__(
self,
model: str,
tokenizer: Optional[str] = None,
tokenizer_mode: str = "auto",
trust_remote_code: bool = True, # modified to run replit
tensor_parallel_size: int = 1,
dtype: str = "auto",
quantization: Optional[str] = None,
revision: Optional[str] = None,
tokenizer_revision: Optional[str] = None,
seed: int = 0,
gpu_memory_utilization: float = 0.9,
swap_space: int = 4,
**kwargs,
) -> None:
Still same issue here for me
When the error happens,
it looks like self.sp_model is not yet set, in replit tokenizer. I got a bit further by swapping it around so that the super().__init line comes after
Original:
def init(self, vocab_file, bos_token=None, eos_token='<|endoftext|>', unk_token='<|unk|>', pad_token='<|pad|>', sep_token=None, sp_model_kwargs: Optional[Dict[str, Any]]=None, **kwargs) -> None:
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
super().init(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, pad_token=pad_token, sep_token=sep_token, sp_model_kwargs=self.sp_model_kwargs, **kwargs)
self.vocab_file = vocab_file
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
self.sp_model.Load(vocab_file)
Edited version:
def init(self, vocab_file, bos_token=None, eos_token='<|endoftext|>', unk_token='<|unk|>', pad_token='<|pad|>', sep_token=None, sp_model_kwargs: Optional[Dict[str, Any]]=None, **kwargs) -> None:
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
self.vocab_file = vocab_file
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
self.sp_model.Load(vocab_file)
super().init(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, pad_token=pad_token, sep_token=sep_token, sp_model_kwargs=self.sp_model_kwargs, **kwargs)
The next issue i faced was with the HF_prefixlm_converter
15 and 16
I needed to comment out line 15,16,23 and 24
#from transformers.models.bloom.modeling_bloom import _expand_mask as _expand_mask_bloom
#from transformers.models.bloom.modeling_bloom import _make_causal_mask as _make_causal_mask_bloom
23 and 24
#from transformers.models.opt.modeling_opt import _expand_mask as _expand_mask_opt
#from transformers.models.opt.modeling_opt import _make_causal_mask as _make_causal_mask_opt