Spaces:

xu-song
/

tokenizer-arena

Running

xu-song commited on Sep 6, 2023

Commit

d2551ad

1 Parent(s): 9dfa838

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -60,10 +60,13 @@ def example_fn(example_idx):
 """Replace this text in the input field to see how tokenization works
-华为智能音箱发布：华为发布mate60 pro手机"""
 default_user_input = """Replace this text in the input field to see how tokenization works
-华为发布mate60 pro手机"""
 default_tokenizer_type_1 = "llama"
 default_tokenizer_type_2 = "internlm_chat_7b"
 default_stats_vocab_size_1, default_stats_zh_token_size_1 = basic_count(default_tokenizer_type_1)
@@ -84,7 +87,8 @@ with gr.Blocks(css="style.css") as demo:
     with gr.Row():
         gr.Markdown("## Input Text")
         dropdown_examples = gr.Dropdown(
-            ["空格测试", "标点测试", "符号测试", "数字测试"],
             value="Examples",
             type="index",
             show_label=False,

 """Replace this text in the input field to see how tokenization works
+"""
 default_user_input = """Replace this text in the input field to see how tokenization works
+华为发布Mate60手机
+ラグビーワールドカップ2023フランス"""
 default_tokenizer_type_1 = "llama"
 default_tokenizer_type_2 = "internlm_chat_7b"
 default_stats_vocab_size_1, default_stats_zh_token_size_1 = basic_count(default_tokenizer_type_1)
     with gr.Row():
         gr.Markdown("## Input Text")
         dropdown_examples = gr.Dropdown(
+            # ["空格测试", "标点测试", "符号测试", "数字测试"],
+            ["spaces", "punctuations", "symbols", "digits"],
             value="Examples",
             type="index",
             show_label=False,