#!/usr/bin/python3
# -*- coding: utf-8 -*-
import re
import unicodedata
import six


class Character(object):
    f_unknown = 'unknown'
    f_is_alnum = 'is_alnum'
    f_is_alpha = 'is_alpha'
    f_is_num = 'is_num'
    f_is_space = 'is_space'
    f_is_hyphens = 'is_hyphens'
    f_is_punctuation = 'is_punctuation'
    f_is_cjk_character = 'is_cjk_character'
    f_is_jap_character = 'is_jap_character'
    f_is_russian_character = 'is_russian_character'

    @classmethod
    def is_alnum(cls, ch: str):
        """注意: string.isalnum() 函数, 会对汉字识别为 True. """
        if cls.is_cjk_character(ch):
            return False
        if ch.isalnum():
            return True
        return False

    @classmethod
    def is_alpha(cls, ch: str):
        if cls.is_cjk_character(ch):
            return False
        if ch.isalpha():
            return True
        return False

    @staticmethod
    def is_control(ch):
        """控制类字符判断"""
        if ch in ('\t', '\n', '\r'):
            return False
        return unicodedata.category(ch) in ("Cc", "Cf")

    @classmethod
    def is_num(cls, ch: str):
        if cls.is_cjk_character(ch):
            return False
        if ch.isdigit():
            return True
        return False

    @classmethod
    def is_space(cls, ch):
        """空格类字符判断"""
        if ch in (" ", '\n', '\r', '\t'):
            return True
        if unicodedata.category(ch) == 'Zs':
            return True
        return False

    @classmethod
    def is_hyphens(cls, ch):
        """
        是否为连字符, `-` 匹配减号.
        + : 43
        - : 45
        """
        code = ord(ch)
        if code in (43, 45):
            return True
        return False

    @classmethod
    def is_punctuation(cls, ch):
        """标点符号类字符判断（全/半角均在此内）"""
        code = ord(ch)
        if 33 <= code <= 47 or \
            58 <= code <= 64 or \
            91 <= code <= 96 or \
            123 <= code <= 126 or \
            unicodedata.category(ch).startswith("P"):
            return True
        return False

    @classmethod
    def is_cjk_character(cls, ch):
        """CJK类字符判断（包括中文字符也在此列）
        参考：https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
        """
        code = ord(ch)

        if 0x4E00 <= code <= 0x9FFF or \
            0x3400 <= code <= 0x4DBF or \
            0x20000 <= code <= 0x2A6DF or \
            0x2A700 <= code <= 0x2B73F or \
            0x2B740 <= code <= 0x2B81F or \
            0x2B820 <= code <= 0x2CEAF or \
            0xF900 <= code <= 0xFAFF or \
            0x2F800 <= code <= 0x2FA1F:
            return True
        return False

    @classmethod
    def is_jap_character(cls, ch):
        code = ord(ch)
        if 0x3040 <= code <= 0x309F or \
               0x30A0 <= code <= 0x30FF or \
               0x31F0 <= code <= 0x31FF:
            return True
        return False

    @classmethod
    def is_russian_character(cls, ch):
        code = ord(ch)
        if 1040 <= code <= 1104:
            return True
        return False

    @staticmethod
    def convert_to_unicode(text):
        """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
        if six.PY3:
            if isinstance(text, str):
                return text
            elif isinstance(text, bytes):
                return text.decode("utf-8", "ignore")
            else:
                raise ValueError("Unsupported string type: %s" % (type(text)))
        elif six.PY2:
            if isinstance(text, str):
                return text.decode("utf-8", "ignore")
            elif isinstance(text, unicode):
                return text
            else:
                raise ValueError("Unsupported string type: %s" % (type(text)))
        else:
            raise ValueError("Not running on Python2 or Python 3?")


class LowerCase(object):
    confuse_map = {
        # 俄语
        'Й': 'И',
        'й': 'и',
        'ѐ': 'е',
        'ё': 'е',
        'ѓ': 'г',
        'ї': 'і',

        # 西语
        'á': 'a',
        'é': 'e',
        'í': 'i',
        'ó': 'o',
        'ú': 'u',
        'ü': 'u',
        'ñ': 'n',
    }

    @classmethod
    def lowercase(cls, string):
        """转小写不应改变字符串的长度"""
        string = str(string).lower()
        result = ''
        for c in string:
            code = ord(c)

            # 俄语转小写.
            if 1040 <= code <= 1072:
                c = chr(ord(c) + 32)

            # 混淆字转换
            flag = cls.confuse_map.get(c)
            if flag is not None:
                c = flag

            result += c

        if len(string) != len(result):
            raise AssertionError('this method should not change the char num. '
                                 'string: {}, result: {}'.format(string, result))
        return result


class Pattern(object):
    r"""
    \d              匹配任意数字符, 等价于 [0-9].
    \s              匹配任意空白字符, 等价于 [\t\n\r\f] 包括空隔.
    re{n,m}         匹配 n 到 m 次由前面的正则表达式定义的片段, 贪婪方式.
    """
    alp_num_ch = r'[A-Z0-9a-z\u4e00-\u9fa5]+'          # 提取中文数字字母
    alp_num_or_others = r'[^A-Z0-9a-z]|[A-Z0-9a-z]+'   # 用于在 ' '.join() 中分融数字字母与其它字符.
    brackets = r'\(.*?\)'                              # 识别括号
    hw_ry_xy = r'华为|荣耀|小艺'
    p_pattern = r'[a-z]\d{1,2}\s+p\d{1,2}'
    pro_pattern = r'([a-z]+\s*\d{1,2})\s+(p\d{1,2})'
    any_blanks = r'\s+'
    square_brackets = r'\[.*?\]'                       # 识别方括号
    regex_dsw_find = r'\\\\[dDsSwW][\+\*]?'            # 从如 `\d+左右` 中去除 `\d+`. 用于正则索引的获取.


class ValidPeriod(object):
    """有效期"""
    l_compare_lt = 'l_compare_lt'
    l_compare_gt = 'l_compare_gt'
    l_time = 'l_time'
    l_time_unit = 'l_time_unit'

    # 每个子正则表达式 (形如: `<?label>pattern`) 都包含一个标签.
    l_compare_lt_prefix_regex = rf'?<{l_compare_lt}>不超过|没到|不到|少于'
    l_compare_gt_prefix_regex = rf'?<{l_compare_gt}>超|超过|超过了|大于|不止'

    l_compare_lt_suffix_regex = rf'?<{l_compare_lt}>没到|不到|内|以内|之内'
    l_compare_gt_suffix_regex = rf'?<{l_compare_gt}>以上|不止'

    l_time_regex = rf'?<{l_time}>[两|壹|零|一|二|三|四|五|六|七|八|九|十|百|千\d]+'
    # l_time_unit_regex = f'?<{l_time_unit}>年|个月|周|天|星期'
    l_time_unit_regex = rf'?<{l_time_unit}>(?:个)?年|个月|周|天|日|星期|个星期'

    # 正则表达式: 识别 -> 不到十天, 一个星期, 超七天后, 七日内, 第七天, 超过了七天, 不止5天 等. 类似的模式.
    pattern1 = rf'(?:({l_compare_lt_prefix_regex})|({l_compare_gt_prefix_regex}))?\s*({l_time_regex})\s*({l_time_unit_regex})\s*(?:({l_compare_lt_suffix_regex})|({l_compare_gt_suffix_regex}))?'

    # 正则表达式: 识别 -> 上个月5号, 这个月14日 等. 日期模式
    pass

    @staticmethod
    def demo1():
        """
        例句:
        一个星期, 超七天后, 七日内, 第七天, 超过了七天, 不止5天

        # 以下句子都是从标注数据中找出的有效期, 将来也许需要处理这些.
        刚买2天, 昨天取的, 昨天到货, 签收后的第二天, 签收后七天内, 前两天, 货还没发, 用了几天
        :return:
        """

        string = "5天不止"

        ret = ValidPeriod.valid_period_parse(string)
        print(ret)
        return

    @staticmethod
    def time_convert(time_string: str):
        base_num_dict = {
            '十': 10,
            '百': 100,
            '千': 1000,
        }
        d = {
            '壹': 1,
            '两': 2,
            '零': 0,
            '一': 1,
            '二': 2,
            '三': 3,
            '四': 4,
            '五': 5,
            '六': 6,
            '七': 7,
            '八': 8,
            '九': 9,
        }
        result = 0
        tmp = ''
        for c in time_string:
            if c.isdecimal():
                tmp += c
                continue

            base_num = base_num_dict.get(c, None)
            if base_num is not None:
                if len(tmp) == 0:
                    result += base_num
                elif len(tmp) == 1:
                    result += base_num * int(tmp)
                    print(result)
                    tmp = ''
                else:
                    pass
            else:
                tmp += str(d.get(c, ''))
        else:
            result += int(tmp)
        return result

    @staticmethod
    def time_unit_convert(time_unit_string: str):
        d = {
            '天': 1,
            '日': 1,
            '周': 7,
            '星期': 7,
            '个星期': 7,
            '个月': 30,
            '年': 365,
        }
        result = d.get(time_unit_string, 1)
        return result

    @staticmethod
    def get_pattern_label(pattern: str):
        """
        子正则表达式都包含了一个标签,
        :param pattern:
        :return:
        """
        pattern_inner = re.compile(r'\?<(.*?)>')
        label_name_list = re.findall(pattern=pattern_inner, string=pattern)
        return label_name_list

    @staticmethod
    def clean_pattern_label(pattern: str):
        pattern_inner = re.compile(r'\?<.*?>')
        result = re.sub(pattern=pattern_inner, repl='', string=pattern)
        return result

    @classmethod
    def valid_period_parse(cls, string: str) -> (int, dict) or (None, dict):
        """cls.pattern1"""
        label_name_list, label_string_list = cls._search_label_list(string, cls.pattern1)
        days, detail = cls._estimate_days(label_name_list, label_string_list)
        return days, detail

    @classmethod
    def _estimate_days(cls, label_name_list, label_string_list) -> (int, dict) or (None, dict):
        """当一个标签都没有识别到时, 两 list 为空. 返回结果为 0. """
        bias = 0
        scale = 1
        main_time = 0

        for label_name, label_string in zip(label_name_list, label_string_list):
            if label_name == cls.l_compare_lt:
                # bias = -1
                pass
            elif label_name == cls.l_compare_gt:
                bias = 1
            elif label_name == cls.l_time:
                main_time += cls.time_convert(label_string)
            elif label_name == cls.l_time_unit:
                scale = cls.time_unit_convert(label_string)
            else:
                pass
        days = main_time * scale + bias
        detail = {
            'main_time': main_time,
            'scale': scale,
            'bias': bias
        }

        if len(label_name_list) == 0 or len(label_string_list) == 0:
            return None, detail

        return days, detail

    @classmethod
    def _search_label_list(cls, string: str, pattern: str) -> (list, list):
        label_name_list = cls.get_pattern_label(pattern)
        pattern = cls.clean_pattern_label(pattern)
        match = re.search(pattern=pattern, string=string)
        if match is None:
            return list(), list()
        label_string_list = match.groups()

        new_label_name_list, new_label_string_list = list(), list()
        for label_name, label_string in zip(label_name_list, label_string_list):
            if label_string is None:
                continue
            new_label_name_list.append(label_name)
            new_label_string_list.append(label_string)

        return new_label_name_list, new_label_string_list

    def __init__(self):
        pass