Spaces:
Running
Running
update
Browse files
app.py
CHANGED
@@ -60,10 +60,13 @@ def example_fn(example_idx):
|
|
60 |
|
61 |
|
62 |
"""Replace this text in the input field to see how tokenization works
|
63 |
-
|
|
|
|
|
64 |
|
65 |
default_user_input = """Replace this text in the input field to see how tokenization works
|
66 |
-
华为发布
|
|
|
67 |
default_tokenizer_type_1 = "llama"
|
68 |
default_tokenizer_type_2 = "internlm_chat_7b"
|
69 |
default_stats_vocab_size_1, default_stats_zh_token_size_1 = basic_count(default_tokenizer_type_1)
|
@@ -84,7 +87,8 @@ with gr.Blocks(css="style.css") as demo:
|
|
84 |
with gr.Row():
|
85 |
gr.Markdown("## Input Text")
|
86 |
dropdown_examples = gr.Dropdown(
|
87 |
-
["空格测试", "标点测试", "符号测试", "数字测试"],
|
|
|
88 |
value="Examples",
|
89 |
type="index",
|
90 |
show_label=False,
|
|
|
60 |
|
61 |
|
62 |
"""Replace this text in the input field to see how tokenization works
|
63 |
+
|
64 |
+
|
65 |
+
"""
|
66 |
|
67 |
default_user_input = """Replace this text in the input field to see how tokenization works
|
68 |
+
华为发布Mate60手机
|
69 |
+
ラグビーワールドカップ2023フランス"""
|
70 |
default_tokenizer_type_1 = "llama"
|
71 |
default_tokenizer_type_2 = "internlm_chat_7b"
|
72 |
default_stats_vocab_size_1, default_stats_zh_token_size_1 = basic_count(default_tokenizer_type_1)
|
|
|
87 |
with gr.Row():
|
88 |
gr.Markdown("## Input Text")
|
89 |
dropdown_examples = gr.Dropdown(
|
90 |
+
# ["空格测试", "标点测试", "符号测试", "数字测试"],
|
91 |
+
["spaces", "punctuations", "symbols", "digits"],
|
92 |
value="Examples",
|
93 |
type="index",
|
94 |
show_label=False,
|