```python from transformers import LlamaTokenizer tokenizer = LlamaTokenizer.from_pretrained( 'ocisd4/llama_tokenizer_ext_zhtw', pad_token='', add_bos_token=True, add_eos_token=False ) #vocab size: 36128 print(tokenizer.tokenize('今天天氣真好!')) #['▁', '今', '天', '天', '氣', '真', '好', '!'] print(tokenizer.encode('今天天氣真好!')) #[1, 29871, 31482, 30408, 30408, 32045, 30848, 31076, 30584] print(tokenizer.decode(tokenizer.encode('今天天氣真好!'))) # 今天天氣真好! ```