Spaces:
Running
Running
""" | |
## reference | |
https://github.com/xai-org/grok-1/blob/main/run.py | |
vocab_size=128 * 1024, | |
pad_token=0, | |
eos_token=2, | |
""" | |
import os | |
import sentencepiece | |
from tokenizer import sptokenizer_patch | |
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
MODEL_FILE = os.path.join(CURRENT_DIR, "tokenizer.model") | |
tokenizer = sentencepiece.SentencePieceProcessor(model_file=MODEL_FILE) | |
# print(tokenizer.decode([1,2,3], skip_special_tokens=True)) |