Spaces:
Sleeping
Sleeping
import contextlib | |
import re | |
from dataclasses import dataclass | |
from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union | |
from .specifiers import Specifier | |
class Token: | |
name: str | |
text: str | |
position: int | |
class ParserSyntaxError(Exception): | |
"""The provided source text could not be parsed correctly.""" | |
def __init__( | |
self, | |
message: str, | |
*, | |
source: str, | |
span: Tuple[int, int], | |
) -> None: | |
self.span = span | |
self.message = message | |
self.source = source | |
super().__init__() | |
def __str__(self) -> str: | |
marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" | |
return "\n ".join([self.message, self.source, marker]) | |
DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { | |
"LEFT_PARENTHESIS": r"\(", | |
"RIGHT_PARENTHESIS": r"\)", | |
"LEFT_BRACKET": r"\[", | |
"RIGHT_BRACKET": r"\]", | |
"SEMICOLON": r";", | |
"COMMA": r",", | |
"QUOTED_STRING": re.compile( | |
r""" | |
( | |
('[^']*') | |
| | |
("[^"]*") | |
) | |
""", | |
re.VERBOSE, | |
), | |
"OP": r"(===|==|~=|!=|<=|>=|<|>)", | |
"BOOLOP": r"\b(or|and)\b", | |
"IN": r"\bin\b", | |
"NOT": r"\bnot\b", | |
"VARIABLE": re.compile( | |
r""" | |
\b( | |
python_version | |
|python_full_version | |
|os[._]name | |
|sys[._]platform | |
|platform_(release|system) | |
|platform[._](version|machine|python_implementation) | |
|python_implementation | |
|implementation_(name|version) | |
|extra | |
)\b | |
""", | |
re.VERBOSE, | |
), | |
"SPECIFIER": re.compile( | |
Specifier._operator_regex_str + Specifier._version_regex_str, | |
re.VERBOSE | re.IGNORECASE, | |
), | |
"AT": r"\@", | |
"URL": r"[^ \t]+", | |
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", | |
"VERSION_PREFIX_TRAIL": r"\.\*", | |
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", | |
"WS": r"[ \t]+", | |
"END": r"$", | |
} | |
class Tokenizer: | |
"""Context-sensitive token parsing. | |
Provides methods to examine the input stream to check whether the next token | |
matches. | |
""" | |
def __init__( | |
self, | |
source: str, | |
*, | |
rules: "Dict[str, Union[str, re.Pattern[str]]]", | |
) -> None: | |
self.source = source | |
self.rules: Dict[str, re.Pattern[str]] = { | |
name: re.compile(pattern) for name, pattern in rules.items() | |
} | |
self.next_token: Optional[Token] = None | |
self.position = 0 | |
def consume(self, name: str) -> None: | |
"""Move beyond provided token name, if at current position.""" | |
if self.check(name): | |
self.read() | |
def check(self, name: str, *, peek: bool = False) -> bool: | |
"""Check whether the next token has the provided name. | |
By default, if the check succeeds, the token *must* be read before | |
another check. If `peek` is set to `True`, the token is not loaded and | |
would need to be checked again. | |
""" | |
assert ( | |
self.next_token is None | |
), f"Cannot check for {name!r}, already have {self.next_token!r}" | |
assert name in self.rules, f"Unknown token name: {name!r}" | |
expression = self.rules[name] | |
match = expression.match(self.source, self.position) | |
if match is None: | |
return False | |
if not peek: | |
self.next_token = Token(name, match[0], self.position) | |
return True | |
def expect(self, name: str, *, expected: str) -> Token: | |
"""Expect a certain token name next, failing with a syntax error otherwise. | |
The token is *not* read. | |
""" | |
if not self.check(name): | |
raise self.raise_syntax_error(f"Expected {expected}") | |
return self.read() | |
def read(self) -> Token: | |
"""Consume the next token and return it.""" | |
token = self.next_token | |
assert token is not None | |
self.position += len(token.text) | |
self.next_token = None | |
return token | |
def raise_syntax_error( | |
self, | |
message: str, | |
*, | |
span_start: Optional[int] = None, | |
span_end: Optional[int] = None, | |
) -> NoReturn: | |
"""Raise ParserSyntaxError at the given position.""" | |
span = ( | |
self.position if span_start is None else span_start, | |
self.position if span_end is None else span_end, | |
) | |
raise ParserSyntaxError( | |
message, | |
source=self.source, | |
span=span, | |
) | |
def enclosing_tokens( | |
self, open_token: str, close_token: str, *, around: str | |
) -> Iterator[None]: | |
if self.check(open_token): | |
open_position = self.position | |
self.read() | |
else: | |
open_position = None | |
yield | |
if open_position is None: | |
return | |
if not self.check(close_token): | |
self.raise_syntax_error( | |
f"Expected matching {close_token} for {open_token}, after {around}", | |
span_start=open_position, | |
) | |
self.read() | |