license: apache-2.0 | |
language: | |
- zh | |
- en | |
pipeline_tag: text-generation | |
inference: false | |
tags: | |
- chinese | |
# explain | |
- base tokenizer from [baichuan-7B](https://github.com/baichuan-inc/baichuan-7B), this model add some maths symbol | |
``` | |
"approx": 64000, | |
"arccos": 64001, | |
"arcsin": 64002, | |
"arctan": 64003, | |
"backsim": 64004, | |
"begin{matrix}": 64005, | |
"begin{vmatrix}": 64006, | |
"beta": 64007, | |
"cdot": 64008, | |
"cdots": 64009, | |
"cong": 64010, | |
"delta": 64011, | |
"dot": 64012, | |
"downarrow": 64013, | |
"end{matrix}": 64014, | |
"end{vmatrix}": 64015, | |
"exists": 64016, | |
"forall": 64017, | |
"gamma": 64018, | |
"geq": 64019, | |
"infty": 64020, | |
"lambda": 64021, | |
"left.": 64022, | |
"left[": 64023, | |
"left{": 64024, | |
"leftrightarrow": 64025, | |
"leq": 64026, | |
"lg": 64027, | |
"neq": 64028, | |
"notin": 64029, | |
"omega": 64030, | |
"overline": 64031, | |
"overrightarrow": 64032, | |
"prime": 64033, | |
"psi": 64034, | |
"rho": 64035, | |
"right.": 64036, | |
"right}": 64037, | |
"right]": 64038, | |
"rightarrow": 64039, | |
"sigma": 64040, | |
"subset": 64041, | |
"subseteq": 64042, | |
"supset": 64043, | |
"supseteq": 64044, | |
"tan": 64045, | |
"textcircled": 64046, | |
"text{": 64047, | |
"therefore": 64048, | |
"theta": 64049, | |
"varepsilon": 64050, | |
"varphi": 64051, | |
"widehat": 64052, | |
"xrightarrow": 64053, | |
"…": 64054, | |
"℃": 64055, | |
"①": 64056, | |
"②": 64057, | |
"③": 64058, | |
"④": 64059, | |
"⑤": 64060, | |
"⑥": 64061, | |
"⑦": 64062, | |
"⑧": 64063, | |
"⑨": 64064, | |
"⑩": 64065, | |
"%": 64066, | |
"(": 64067, | |
")": 64068, | |
"+": 64069, | |
"-": 64070, | |
".": 64071, | |
";": 64072, | |
"<": 64073, | |
"=": 64074, | |
">": 64075 | |
``` |