Spaces:
Running
Running
USE_REMOTE = False # use remote tokenizer or local tokenizer | |
# load_vocab_with_SPECIAL_TOKEN = True # 如果不包含会导致计算词典大小错误、overlap_token计算不一致。 | |
# encoding config | |
ADD_SPECIAL_TOKEN = False | |
# | |
LAZY_IMPORT = True | |
# DEBUG: 设置环境变量 RUST_BACKTRACE=full | |
# | |
default_user_input = """\ | |
Replace this text in the input field to see how tokenization works. | |
Buenos días! | |
华为发布Mate60手机。 | |
ラグビーワールドカップ2023フランス""" | |
default_tokenizer_type_1 = "llama3" | |
default_tokenizer_type_2 = "gpt_4" | |