Spaces:
Running
Running
File size: 1,352 Bytes
751936e 814ee6b 751936e 814ee6b 751936e 814ee6b 751936e 814ee6b f4973d4 814ee6b f4973d4 814ee6b f4973d4 814ee6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
from zhon.hanzi import punctuation as zh_punc
def is_zh_char(uchar):
"""
https://github.com/fxsjy/jieba/blob/master/jieba/__init__.py#L48
re.compile("([\u4E00-\u9FD5]+)", re.U)
"""
return u'\u4e00' <= uchar <= u'\u9fa5'
def has_zh(text):
""" contains Chinese characters """
return any(is_zh_char(ch) for ch in text)
def get_zh_count(text):
return sum([is_zh_char(uchar) for uchar in text])
def is_all_zh(text):
return all(is_zh_char(char) for char in text)
def is_all_en(text):
return text.encode('utf-8').isalpha()
def is_digit_char(uchar):
return uchar in "0123456789"
def has_digit(text):
return any(is_digit_char(ch) for ch in text)
def is_all_digit(text):
return all(is_digit_char(char) for char in text)
def get_digit_count(text):
digit_count = 0
for char in text:
if char in "0123456789":
digit_count += 1
return digit_count
def has_zh_punc(text):
"""
是否包含中文标点
"""
return any(ch in zh_punc for ch in text)
def is_space_char(uchar):
"""
https://emptycharacter.com/
"""
def has_space(text):
pass
def is_all_space(text):
pass
def get_space_count(text):
space_count = 0
for char in text:
if len(char.strip()) == 0:
space_count += 1
return space_count
|