Spaces:
Running
Running
File size: 566 Bytes
814ee6b d27a756 814ee6b 480ae5d 7d2062e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
USE_REMOTE = False # use remote tokenizer or local tokenizer
# load_vocab_with_SPECIAL_TOKEN = True # 如果不包含会导致计算词典大小错误、overlap_token计算不一致。
# encoding config
ADD_SPECIAL_TOKEN = False
#
LAZY_IMPORT = True
# DEBUG: 设置环境变量 RUST_BACKTRACE=full
#
default_user_input = """\
Replace this text in the input field to see how tokenization works.
Buenos días!
华为发布Mate60手机。
ラグビーワールドカップ2023フランス"""
default_tokenizer_type_1 = "llama3"
default_tokenizer_type_2 = "gpt_4"
|