Spaces:
Runtime error
Runtime error
""" | |
babel.numbers | |
~~~~~~~~~~~~~ | |
CLDR Plural support. See UTS #35. | |
:copyright: (c) 2013-2023 by the Babel Team. | |
:license: BSD, see LICENSE for more details. | |
""" | |
from __future__ import annotations | |
import decimal | |
import re | |
from collections.abc import Iterable, Mapping | |
from typing import TYPE_CHECKING, Any, Callable | |
if TYPE_CHECKING: | |
from typing_extensions import Literal | |
_plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other') | |
_fallback_tag = 'other' | |
def extract_operands(source: float | decimal.Decimal) -> tuple[decimal.Decimal | int, int, int, int, int, int, Literal[0], Literal[0]]: | |
"""Extract operands from a decimal, a float or an int, according to `CLDR rules`_. | |
The result is an 8-tuple (n, i, v, w, f, t, c, e), where those symbols are as follows: | |
====== =============================================================== | |
Symbol Value | |
------ --------------------------------------------------------------- | |
n absolute value of the source number (integer and decimals). | |
i integer digits of n. | |
v number of visible fraction digits in n, with trailing zeros. | |
w number of visible fraction digits in n, without trailing zeros. | |
f visible fractional digits in n, with trailing zeros. | |
t visible fractional digits in n, without trailing zeros. | |
c compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting. | |
e currently, synonym for ‘c’. however, may be redefined in the future. | |
====== =============================================================== | |
.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-61/tr35-numbers.html#Operands | |
:param source: A real number | |
:type source: int|float|decimal.Decimal | |
:return: A n-i-v-w-f-t-c-e tuple | |
:rtype: tuple[decimal.Decimal, int, int, int, int, int, int, int] | |
""" | |
n = abs(source) | |
i = int(n) | |
if isinstance(n, float): | |
if i == n: | |
n = i | |
else: | |
# Cast the `float` to a number via the string representation. | |
# This is required for Python 2.6 anyway (it will straight out fail to | |
# do the conversion otherwise), and it's highly unlikely that the user | |
# actually wants the lossless conversion behavior (quoting the Python | |
# documentation): | |
# > If value is a float, the binary floating point value is losslessly | |
# > converted to its exact decimal equivalent. | |
# > This conversion can often require 53 or more digits of precision. | |
# Should the user want that behavior, they can simply pass in a pre- | |
# converted `Decimal` instance of desired accuracy. | |
n = decimal.Decimal(str(n)) | |
if isinstance(n, decimal.Decimal): | |
dec_tuple = n.as_tuple() | |
exp = dec_tuple.exponent | |
fraction_digits = dec_tuple.digits[exp:] if exp < 0 else () | |
trailing = ''.join(str(d) for d in fraction_digits) | |
no_trailing = trailing.rstrip('0') | |
v = len(trailing) | |
w = len(no_trailing) | |
f = int(trailing or 0) | |
t = int(no_trailing or 0) | |
else: | |
v = w = f = t = 0 | |
c = e = 0 # TODO: c and e are not supported | |
return n, i, v, w, f, t, c, e | |
class PluralRule: | |
"""Represents a set of language pluralization rules. The constructor | |
accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The | |
resulting object is callable and accepts one parameter with a positive or | |
negative number (both integer and float) for the number that indicates the | |
plural form for a string and returns the tag for the format: | |
>>> rule = PluralRule({'one': 'n is 1'}) | |
>>> rule(1) | |
'one' | |
>>> rule(2) | |
'other' | |
Currently the CLDR defines these tags: zero, one, two, few, many and | |
other where other is an implicit default. Rules should be mutually | |
exclusive; for a given numeric value, only one rule should apply (i.e. | |
the condition should only be true for one of the plural rule elements. | |
.. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules | |
""" | |
__slots__ = ('abstract', '_func') | |
def __init__(self, rules: Mapping[str, str] | Iterable[tuple[str, str]]) -> None: | |
"""Initialize the rule instance. | |
:param rules: a list of ``(tag, expr)``) tuples with the rules | |
conforming to UTS #35 or a dict with the tags as keys | |
and expressions as values. | |
:raise RuleError: if the expression is malformed | |
""" | |
if isinstance(rules, Mapping): | |
rules = rules.items() | |
found = set() | |
self.abstract: list[tuple[str, Any]] = [] | |
for key, expr in sorted(rules): | |
if key not in _plural_tags: | |
raise ValueError(f"unknown tag {key!r}") | |
elif key in found: | |
raise ValueError(f"tag {key!r} defined twice") | |
found.add(key) | |
ast = _Parser(expr).ast | |
if ast: | |
self.abstract.append((key, ast)) | |
def __repr__(self) -> str: | |
rules = self.rules | |
args = ", ".join([f"{tag}: {rules[tag]}" for tag in _plural_tags if tag in rules]) | |
return f"<{type(self).__name__} {args!r}>" | |
def parse(cls, rules: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> PluralRule: | |
"""Create a `PluralRule` instance for the given rules. If the rules | |
are a `PluralRule` object, that object is returned. | |
:param rules: the rules as list or dict, or a `PluralRule` object | |
:raise RuleError: if the expression is malformed | |
""" | |
if isinstance(rules, PluralRule): | |
return rules | |
return cls(rules) | |
def rules(self) -> Mapping[str, str]: | |
"""The `PluralRule` as a dict of unicode plural rules. | |
>>> rule = PluralRule({'one': 'n is 1'}) | |
>>> rule.rules | |
{'one': 'n is 1'} | |
""" | |
_compile = _UnicodeCompiler().compile | |
return {tag: _compile(ast) for tag, ast in self.abstract} | |
def tags(self) -> frozenset[str]: | |
"""A set of explicitly defined tags in this rule. The implicit default | |
``'other'`` rules is not part of this set unless there is an explicit | |
rule for it. | |
""" | |
return frozenset(i[0] for i in self.abstract) | |
def __getstate__(self) -> list[tuple[str, Any]]: | |
return self.abstract | |
def __setstate__(self, abstract: list[tuple[str, Any]]) -> None: | |
self.abstract = abstract | |
def __call__(self, n: float | decimal.Decimal) -> str: | |
if not hasattr(self, '_func'): | |
self._func = to_python(self) | |
return self._func(n) | |
def to_javascript(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str: | |
"""Convert a list/dict of rules or a `PluralRule` object into a JavaScript | |
function. This function depends on no external library: | |
>>> to_javascript({'one': 'n is 1'}) | |
"(function(n) { return (n == 1) ? 'one' : 'other'; })" | |
Implementation detail: The function generated will probably evaluate | |
expressions involved into range operations multiple times. This has the | |
advantage that external helper functions are not required and is not a | |
big performance hit for these simple calculations. | |
:param rule: the rules as list or dict, or a `PluralRule` object | |
:raise RuleError: if the expression is malformed | |
""" | |
to_js = _JavaScriptCompiler().compile | |
result = ['(function(n) { return '] | |
for tag, ast in PluralRule.parse(rule).abstract: | |
result.append(f"{to_js(ast)} ? {tag!r} : ") | |
result.append('%r; })' % _fallback_tag) | |
return ''.join(result) | |
def to_python(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> Callable[[float | decimal.Decimal], str]: | |
"""Convert a list/dict of rules or a `PluralRule` object into a regular | |
Python function. This is useful in situations where you need a real | |
function and don't are about the actual rule object: | |
>>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'}) | |
>>> func(1) | |
'one' | |
>>> func(3) | |
'few' | |
>>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'}) | |
>>> func(11) | |
'one' | |
>>> func(15) | |
'few' | |
:param rule: the rules as list or dict, or a `PluralRule` object | |
:raise RuleError: if the expression is malformed | |
""" | |
namespace = { | |
'IN': in_range_list, | |
'WITHIN': within_range_list, | |
'MOD': cldr_modulo, | |
'extract_operands': extract_operands, | |
} | |
to_python_func = _PythonCompiler().compile | |
result = [ | |
'def evaluate(n):', | |
' n, i, v, w, f, t, c, e = extract_operands(n)', | |
] | |
for tag, ast in PluralRule.parse(rule).abstract: | |
# the str() call is to coerce the tag to the native string. It's | |
# a limited ascii restricted set of tags anyways so that is fine. | |
result.append(f" if ({to_python_func(ast)}): return {str(tag)!r}") | |
result.append(f" return {_fallback_tag!r}") | |
code = compile('\n'.join(result), '<rule>', 'exec') | |
eval(code, namespace) | |
return namespace['evaluate'] | |
def to_gettext(rule: Mapping[str, str] | Iterable[tuple[str, str]] | PluralRule) -> str: | |
"""The plural rule as gettext expression. The gettext expression is | |
technically limited to integers and returns indices rather than tags. | |
>>> to_gettext({'one': 'n is 1', 'two': 'n is 2'}) | |
'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2);' | |
:param rule: the rules as list or dict, or a `PluralRule` object | |
:raise RuleError: if the expression is malformed | |
""" | |
rule = PluralRule.parse(rule) | |
used_tags = rule.tags | {_fallback_tag} | |
_compile = _GettextCompiler().compile | |
_get_index = [tag for tag in _plural_tags if tag in used_tags].index | |
result = [f"nplurals={len(used_tags)}; plural=("] | |
for tag, ast in rule.abstract: | |
result.append(f"{_compile(ast)} ? {_get_index(tag)} : ") | |
result.append(f"{_get_index(_fallback_tag)});") | |
return ''.join(result) | |
def in_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool: | |
"""Integer range list test. This is the callback for the "in" operator | |
of the UTS #35 pluralization rule language: | |
>>> in_range_list(1, [(1, 3)]) | |
True | |
>>> in_range_list(3, [(1, 3)]) | |
True | |
>>> in_range_list(3, [(1, 3), (5, 8)]) | |
True | |
>>> in_range_list(1.2, [(1, 4)]) | |
False | |
>>> in_range_list(10, [(1, 4)]) | |
False | |
>>> in_range_list(10, [(1, 4), (6, 8)]) | |
False | |
""" | |
return num == int(num) and within_range_list(num, range_list) | |
def within_range_list(num: float | decimal.Decimal, range_list: Iterable[Iterable[float | decimal.Decimal]]) -> bool: | |
"""Float range test. This is the callback for the "within" operator | |
of the UTS #35 pluralization rule language: | |
>>> within_range_list(1, [(1, 3)]) | |
True | |
>>> within_range_list(1.0, [(1, 3)]) | |
True | |
>>> within_range_list(1.2, [(1, 4)]) | |
True | |
>>> within_range_list(8.8, [(1, 4), (7, 15)]) | |
True | |
>>> within_range_list(10, [(1, 4)]) | |
False | |
>>> within_range_list(10.5, [(1, 4), (20, 30)]) | |
False | |
""" | |
return any(num >= min_ and num <= max_ for min_, max_ in range_list) | |
def cldr_modulo(a: float, b: float) -> float: | |
"""Javaish modulo. This modulo operator returns the value with the sign | |
of the dividend rather than the divisor like Python does: | |
>>> cldr_modulo(-3, 5) | |
-3 | |
>>> cldr_modulo(-3, -5) | |
-3 | |
>>> cldr_modulo(3, 5) | |
3 | |
""" | |
reverse = 0 | |
if a < 0: | |
a *= -1 | |
reverse = 1 | |
if b < 0: | |
b *= -1 | |
rv = a % b | |
if reverse: | |
rv *= -1 | |
return rv | |
class RuleError(Exception): | |
"""Raised if a rule is malformed.""" | |
_VARS = { | |
'n', # absolute value of the source number. | |
'i', # integer digits of n. | |
'v', # number of visible fraction digits in n, with trailing zeros.* | |
'w', # number of visible fraction digits in n, without trailing zeros.* | |
'f', # visible fraction digits in n, with trailing zeros.* | |
't', # visible fraction digits in n, without trailing zeros.* | |
'c', # compact decimal exponent value: exponent of the power of 10 used in compact decimal formatting. | |
'e', # currently, synonym for `c`. however, may be redefined in the future. | |
} | |
_RULES: list[tuple[str | None, re.Pattern[str]]] = [ | |
(None, re.compile(r'\s+', re.UNICODE)), | |
('word', re.compile(fr'\b(and|or|is|(?:with)?in|not|mod|[{"".join(_VARS)}])\b')), | |
('value', re.compile(r'\d+')), | |
('symbol', re.compile(r'%|,|!=|=')), | |
('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS | |
] | |
def tokenize_rule(s: str) -> list[tuple[str, str]]: | |
s = s.split('@')[0] | |
result: list[tuple[str, str]] = [] | |
pos = 0 | |
end = len(s) | |
while pos < end: | |
for tok, rule in _RULES: | |
match = rule.match(s, pos) | |
if match is not None: | |
pos = match.end() | |
if tok: | |
result.append((tok, match.group())) | |
break | |
else: | |
raise RuleError('malformed CLDR pluralization rule. ' | |
'Got unexpected %r' % s[pos]) | |
return result[::-1] | |
def test_next_token( | |
tokens: list[tuple[str, str]], | |
type_: str, | |
value: str | None = None, | |
) -> list[tuple[str, str]] | bool: | |
return tokens and tokens[-1][0] == type_ and \ | |
(value is None or tokens[-1][1] == value) | |
def skip_token(tokens: list[tuple[str, str]], type_: str, value: str | None = None): | |
if test_next_token(tokens, type_, value): | |
return tokens.pop() | |
def value_node(value: int) -> tuple[Literal['value'], tuple[int]]: | |
return 'value', (value, ) | |
def ident_node(name: str) -> tuple[str, tuple[()]]: | |
return name, () | |
def range_list_node( | |
range_list: Iterable[Iterable[float | decimal.Decimal]], | |
) -> tuple[Literal['range_list'], Iterable[Iterable[float | decimal.Decimal]]]: | |
return 'range_list', range_list | |
def negate(rv: tuple[Any, ...]) -> tuple[Literal['not'], tuple[tuple[Any, ...]]]: | |
return 'not', (rv,) | |
class _Parser: | |
"""Internal parser. This class can translate a single rule into an abstract | |
tree of tuples. It implements the following grammar:: | |
condition = and_condition ('or' and_condition)* | |
('@integer' samples)? | |
('@decimal' samples)? | |
and_condition = relation ('and' relation)* | |
relation = is_relation | in_relation | within_relation | |
is_relation = expr 'is' ('not')? value | |
in_relation = expr (('not')? 'in' | '=' | '!=') range_list | |
within_relation = expr ('not')? 'within' range_list | |
expr = operand (('mod' | '%') value)? | |
operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w' | |
range_list = (range | value) (',' range_list)* | |
value = digit+ | |
digit = 0|1|2|3|4|5|6|7|8|9 | |
range = value'..'value | |
samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))? | |
sampleRange = decimalValue '~' decimalValue | |
decimalValue = value ('.' value)? | |
- Whitespace can occur between or around any of the above tokens. | |
- Rules should be mutually exclusive; for a given numeric value, only one | |
rule should apply (i.e. the condition should only be true for one of | |
the plural rule elements). | |
- The in and within relations can take comma-separated lists, such as: | |
'n in 3,5,7..15'. | |
- Samples are ignored. | |
The translator parses the expression on instantiation into an attribute | |
called `ast`. | |
""" | |
def __init__(self, string): | |
self.tokens = tokenize_rule(string) | |
if not self.tokens: | |
# If the pattern is only samples, it's entirely possible | |
# no stream of tokens whatsoever is generated. | |
self.ast = None | |
return | |
self.ast = self.condition() | |
if self.tokens: | |
raise RuleError(f"Expected end of rule, got {self.tokens[-1][1]!r}") | |
def expect(self, type_, value=None, term=None): | |
token = skip_token(self.tokens, type_, value) | |
if token is not None: | |
return token | |
if term is None: | |
term = repr(value is None and type_ or value) | |
if not self.tokens: | |
raise RuleError(f"expected {term} but end of rule reached") | |
raise RuleError(f"expected {term} but got {self.tokens[-1][1]!r}") | |
def condition(self): | |
op = self.and_condition() | |
while skip_token(self.tokens, 'word', 'or'): | |
op = 'or', (op, self.and_condition()) | |
return op | |
def and_condition(self): | |
op = self.relation() | |
while skip_token(self.tokens, 'word', 'and'): | |
op = 'and', (op, self.relation()) | |
return op | |
def relation(self): | |
left = self.expr() | |
if skip_token(self.tokens, 'word', 'is'): | |
return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \ | |
(left, self.value()) | |
negated = skip_token(self.tokens, 'word', 'not') | |
method = 'in' | |
if skip_token(self.tokens, 'word', 'within'): | |
method = 'within' | |
else: | |
if not skip_token(self.tokens, 'word', 'in'): | |
if negated: | |
raise RuleError('Cannot negate operator based rules.') | |
return self.newfangled_relation(left) | |
rv = 'relation', (method, left, self.range_list()) | |
return negate(rv) if negated else rv | |
def newfangled_relation(self, left): | |
if skip_token(self.tokens, 'symbol', '='): | |
negated = False | |
elif skip_token(self.tokens, 'symbol', '!='): | |
negated = True | |
else: | |
raise RuleError('Expected "=" or "!=" or legacy relation') | |
rv = 'relation', ('in', left, self.range_list()) | |
return negate(rv) if negated else rv | |
def range_or_value(self): | |
left = self.value() | |
if skip_token(self.tokens, 'ellipsis'): | |
return left, self.value() | |
else: | |
return left, left | |
def range_list(self): | |
range_list = [self.range_or_value()] | |
while skip_token(self.tokens, 'symbol', ','): | |
range_list.append(self.range_or_value()) | |
return range_list_node(range_list) | |
def expr(self): | |
word = skip_token(self.tokens, 'word') | |
if word is None or word[1] not in _VARS: | |
raise RuleError('Expected identifier variable') | |
name = word[1] | |
if skip_token(self.tokens, 'word', 'mod'): | |
return 'mod', ((name, ()), self.value()) | |
elif skip_token(self.tokens, 'symbol', '%'): | |
return 'mod', ((name, ()), self.value()) | |
return ident_node(name) | |
def value(self): | |
return value_node(int(self.expect('value')[1])) | |
def _binary_compiler(tmpl): | |
"""Compiler factory for the `_Compiler`.""" | |
return lambda self, left, right: tmpl % (self.compile(left), self.compile(right)) | |
def _unary_compiler(tmpl): | |
"""Compiler factory for the `_Compiler`.""" | |
return lambda self, x: tmpl % self.compile(x) | |
compile_zero = lambda x: '0' | |
class _Compiler: | |
"""The compilers are able to transform the expressions into multiple | |
output formats. | |
""" | |
def compile(self, arg): | |
op, args = arg | |
return getattr(self, f"compile_{op}")(*args) | |
compile_n = lambda x: 'n' | |
compile_i = lambda x: 'i' | |
compile_v = lambda x: 'v' | |
compile_w = lambda x: 'w' | |
compile_f = lambda x: 'f' | |
compile_t = lambda x: 't' | |
compile_c = lambda x: 'c' | |
compile_e = lambda x: 'e' | |
compile_value = lambda x, v: str(v) | |
compile_and = _binary_compiler('(%s && %s)') | |
compile_or = _binary_compiler('(%s || %s)') | |
compile_not = _unary_compiler('(!%s)') | |
compile_mod = _binary_compiler('(%s %% %s)') | |
compile_is = _binary_compiler('(%s == %s)') | |
compile_isnot = _binary_compiler('(%s != %s)') | |
def compile_relation(self, method, expr, range_list): | |
raise NotImplementedError() | |
class _PythonCompiler(_Compiler): | |
"""Compiles an expression to Python.""" | |
compile_and = _binary_compiler('(%s and %s)') | |
compile_or = _binary_compiler('(%s or %s)') | |
compile_not = _unary_compiler('(not %s)') | |
compile_mod = _binary_compiler('MOD(%s, %s)') | |
def compile_relation(self, method, expr, range_list): | |
ranges = ",".join([f"({self.compile(a)}, {self.compile(b)})" for (a, b) in range_list[1]]) | |
return f"{method.upper()}({self.compile(expr)}, [{ranges}])" | |
class _GettextCompiler(_Compiler): | |
"""Compile into a gettext plural expression.""" | |
compile_i = _Compiler.compile_n | |
compile_v = compile_zero | |
compile_w = compile_zero | |
compile_f = compile_zero | |
compile_t = compile_zero | |
def compile_relation(self, method, expr, range_list): | |
rv = [] | |
expr = self.compile(expr) | |
for item in range_list[1]: | |
if item[0] == item[1]: | |
rv.append(f"({expr} == {self.compile(item[0])})") | |
else: | |
min, max = map(self.compile, item) | |
rv.append(f"({expr} >= {min} && {expr} <= {max})") | |
return f"({' || '.join(rv)})" | |
class _JavaScriptCompiler(_GettextCompiler): | |
"""Compiles the expression to plain of JavaScript.""" | |
# XXX: presently javascript does not support any of the | |
# fraction support and basically only deals with integers. | |
compile_i = lambda x: 'parseInt(n, 10)' | |
compile_v = compile_zero | |
compile_w = compile_zero | |
compile_f = compile_zero | |
compile_t = compile_zero | |
def compile_relation(self, method, expr, range_list): | |
code = _GettextCompiler.compile_relation( | |
self, method, expr, range_list) | |
if method == 'in': | |
expr = self.compile(expr) | |
code = f"(parseInt({expr}, 10) == {expr} && {code})" | |
return code | |
class _UnicodeCompiler(_Compiler): | |
"""Returns a unicode pluralization rule again.""" | |
# XXX: this currently spits out the old syntax instead of the new | |
# one. We can change that, but it will break a whole bunch of stuff | |
# for users I suppose. | |
compile_is = _binary_compiler('%s is %s') | |
compile_isnot = _binary_compiler('%s is not %s') | |
compile_and = _binary_compiler('%s and %s') | |
compile_or = _binary_compiler('%s or %s') | |
compile_mod = _binary_compiler('%s mod %s') | |
def compile_not(self, relation): | |
return self.compile_relation(*relation[1], negated=True) | |
def compile_relation(self, method, expr, range_list, negated=False): | |
ranges = [] | |
for item in range_list[1]: | |
if item[0] == item[1]: | |
ranges.append(self.compile(item[0])) | |
else: | |
ranges.append(f"{self.compile(item[0])}..{self.compile(item[1])}") | |
return f"{self.compile(expr)}{' not' if negated else ''} {method} {','.join(ranges)}" | |