|
"""Translate english to chinese via a dict.""" |
|
from typing import List, Union |
|
|
|
from gradiobee.en2zh import en2zh |
|
from gradiobee.insert_spaces import insert_spaces |
|
|
|
|
|
|
|
def en2zh_tokens( |
|
|
|
text: Union[str, List[str]], |
|
dedup: bool = True, |
|
) -> List[List[str]]: |
|
|
|
"""Translate english to chinese tokens via a dict. |
|
|
|
Args |
|
text: to translate, list of str |
|
dedup: if True, remove all duplicates |
|
Returns |
|
res: list of list of str/token/char |
|
""" |
|
res = en2zh(text) |
|
|
|
if dedup: |
|
return [list(set(insert_spaces(elm).split())) for elm in res] |
|
|
|
return [insert_spaces(elm).split() for elm in res] |
|
|