update
Browse files- util.py +2 -8
- vocab/__init__.py +1 -1
util.py
CHANGED
@@ -72,6 +72,7 @@ def tokenize_pair(text, tokenizer_type_1, tokenizer_type_2):
|
|
72 |
return pos_tokens_1, table_df_1, pos_tokens_2, table_df_2
|
73 |
|
74 |
|
|
|
75 |
def basic_count(tokenizer_type):
|
76 |
tokenizer = load_tokener(tokenizer_type)
|
77 |
stats = iter_vocab(tokenizer, tokenizer_type)
|
@@ -113,7 +114,6 @@ def on_load(url_params, request: gr.Request):
|
|
113 |
"""
|
114 |
onLoad
|
115 |
"""
|
116 |
-
|
117 |
text = None
|
118 |
tokenizer_type_1 = None
|
119 |
tokenizer_type_2 = None
|
@@ -122,11 +122,7 @@ def on_load(url_params, request: gr.Request):
|
|
122 |
except:
|
123 |
url_params = {}
|
124 |
if request:
|
125 |
-
|
126 |
-
logger.info(str(request.headers))
|
127 |
-
logger.info(str(request.query_params))
|
128 |
-
except:
|
129 |
-
pass
|
130 |
client_ip = request.client.host
|
131 |
# local_ip = socket.gethostbyname(socket.gethostbyname(""))
|
132 |
# headers = request.kwargs['headers']
|
@@ -139,8 +135,6 @@ def on_load(url_params, request: gr.Request):
|
|
139 |
tokenizer_type_1 = url_params.get("tokenizer1", default_tokenizer_type_1)
|
140 |
tokenizer_type_2 = url_params.get("tokenizer2", default_tokenizer_type_2)
|
141 |
text = url_params.get("text", default_user_input)
|
142 |
-
|
143 |
-
|
144 |
logger.info(f"client_ip: {client_ip}; params: {url_params}")
|
145 |
return text, tokenizer_type_1, tokenizer_type_2
|
146 |
|
|
|
72 |
return pos_tokens_1, table_df_1, pos_tokens_2, table_df_2
|
73 |
|
74 |
|
75 |
+
@lru_cache
|
76 |
def basic_count(tokenizer_type):
|
77 |
tokenizer = load_tokener(tokenizer_type)
|
78 |
stats = iter_vocab(tokenizer, tokenizer_type)
|
|
|
114 |
"""
|
115 |
onLoad
|
116 |
"""
|
|
|
117 |
text = None
|
118 |
tokenizer_type_1 = None
|
119 |
tokenizer_type_2 = None
|
|
|
122 |
except:
|
123 |
url_params = {}
|
124 |
if request:
|
125 |
+
logger.info(str(request.headers))
|
|
|
|
|
|
|
|
|
126 |
client_ip = request.client.host
|
127 |
# local_ip = socket.gethostbyname(socket.gethostbyname(""))
|
128 |
# headers = request.kwargs['headers']
|
|
|
135 |
tokenizer_type_1 = url_params.get("tokenizer1", default_tokenizer_type_1)
|
136 |
tokenizer_type_2 = url_params.get("tokenizer2", default_tokenizer_type_2)
|
137 |
text = url_params.get("text", default_user_input)
|
|
|
|
|
138 |
logger.info(f"client_ip: {client_ip}; params: {url_params}")
|
139 |
return text, tokenizer_type_1, tokenizer_type_2
|
140 |
|
vocab/__init__.py
CHANGED
@@ -75,7 +75,7 @@ all_tokenizers = [
|
|
75 |
# ##### glm系列
|
76 |
"glm_chinese",
|
77 |
"chatglm_6b",
|
78 |
-
"
|
79 |
#
|
80 |
# #### llama alpaca系列
|
81 |
"llama", # '中文单字': 700, '中文多字': 0
|
|
|
75 |
# ##### glm系列
|
76 |
"glm_chinese",
|
77 |
"chatglm_6b",
|
78 |
+
"chatglm2_6b",
|
79 |
#
|
80 |
# #### llama alpaca系列
|
81 |
"llama", # '中文单字': 700, '中文多字': 0
|