Spaces:
Running
Running
File size: 470 Bytes
480ae5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
"""
## reference
https://github.com/xai-org/grok-1/blob/main/run.py
vocab_size=128 * 1024,
pad_token=0,
eos_token=2,
"""
import os
import sentencepiece
from tokenizer import sptokenizer_patch
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
MODEL_FILE = os.path.join(CURRENT_DIR, "tokenizer.model")
tokenizer = sentencepiece.SentencePieceProcessor(model_file=MODEL_FILE)
# print(tokenizer.decode([1,2,3], skip_special_tokens=True)) |