| @@ -1,8 +1,9 @@ | |||
| from types import ModuleType | |||
| from .utils import Serialize | |||
| from .lexer import TerminalDef, Token | |||
| ###{standalone | |||
| from types import ModuleType | |||
| from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING | |||
| if TYPE_CHECKING: | |||
| @@ -17,13 +18,13 @@ class LexerConf(Serialize): | |||
| terminals: Collection[TerminalDef] | |||
| re_module: ModuleType | |||
| ignore: Collection[str] = () | |||
| postlex: 'PostLex' = None | |||
| callbacks: Optional[Dict[str, _Callback]] = None | |||
| postlex: 'Optional[PostLex]' = None | |||
| callbacks: Dict[str, _Callback] = {} | |||
| g_regex_flags: int = 0 | |||
| skip_validation: bool = False | |||
| use_bytes: bool = False | |||
| def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'PostLex'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): | |||
| def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): | |||
| self.terminals = terminals | |||
| self.terminals_by_name = {t.name: t for t in self.terminals} | |||
| assert len(self.terminals) == len(self.terminals_by_name) | |||
| @@ -3,7 +3,7 @@ from .utils import logger, NO_VALUE | |||
| ###{standalone | |||
| from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING | |||
| from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING | |||
| if TYPE_CHECKING: | |||
| from .lexer import Token | |||
| @@ -73,7 +73,7 @@ class UnexpectedInput(LarkError): | |||
| after = text[pos:end].split(b'\n', 1)[0] | |||
| return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | |||
| def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T: | |||
| def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> Optional[T]: | |||
| """Allows you to detect what's wrong in the input text by matching | |||
| against example errors. | |||
| @@ -14,12 +14,12 @@ class DedentError(LarkError): | |||
| class Indenter(PostLex, ABC): | |||
| paren_level: Optional[int] | |||
| indent_level: Optional[List[int]] | |||
| paren_level: int | |||
| indent_level: List[int] | |||
| def __init__(self) -> None: | |||
| self.paren_level = None | |||
| self.indent_level = None | |||
| self.paren_level = 0 | |||
| self.indent_level = [0] | |||
| assert self.tab_len > 0 | |||
| def handle_NL(self, token: Token) -> Iterator[Token]: | |||
| @@ -15,7 +15,7 @@ from .grammar import Rule | |||
| import re | |||
| try: | |||
| import regex | |||
| import regex # type: ignore | |||
| except ImportError: | |||
| regex = None | |||
| @@ -149,7 +149,7 @@ class LarkOptions(Serialize): | |||
| # - As an attribute of `LarkOptions` above | |||
| # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | |||
| # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | |||
| _defaults = { | |||
| _defaults: Dict[str, Any] = { | |||
| 'debug': False, | |||
| 'keep_all_tokens': False, | |||
| 'tree_class': None, | |||
| @@ -414,6 +414,7 @@ class Lark(Serialize): | |||
| if cache_fn: | |||
| logger.debug('Saving grammar to cache: %s', cache_fn) | |||
| with FS.open(cache_fn, 'wb') as f: | |||
| assert cache_md5 is not None | |||
| f.write(cache_md5.encode('utf8') + b'\n') | |||
| pickle.dump(used_files, f) | |||
| self.save(f) | |||
| @@ -574,7 +575,7 @@ class Lark(Serialize): | |||
| """Get information about a terminal""" | |||
| return self._terminals_dict[name] | |||
| def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser': | |||
| def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': | |||
| """Start an interactive parsing session. | |||
| Parameters: | |||
| @@ -588,7 +589,7 @@ class Lark(Serialize): | |||
| """ | |||
| return self.parser.parse_interactive(text, start=start) | |||
| def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree: | |||
| def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree: | |||
| """Parse the given text, according to the options provided. | |||
| Parameters: | |||
| @@ -23,10 +23,10 @@ class Pattern(Serialize, ABC): | |||
| value: str | |||
| flags: Collection[str] | |||
| raw: str = None | |||
| type: str = None | |||
| raw: Optional[str] = None | |||
| type: Optional[str] = None | |||
| def __init__(self, value: str, flags: Collection[str]=(), raw: str=None) -> None: | |||
| def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: | |||
| self.value = value | |||
| self.flags = frozenset(flags) | |||
| self.raw = raw | |||
| @@ -81,7 +81,10 @@ class PatternStr(Pattern): | |||
| @property | |||
| def min_width(self) -> int: | |||
| return len(self.value) | |||
| max_width = min_width | |||
| @property | |||
| def max_width(self) -> int: | |||
| return len(self.value) | |||
| class PatternRE(Pattern): | |||
| @@ -320,15 +323,36 @@ def _regexp_has_newline(r): | |||
| """ | |||
| return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | |||
| class LexerState(object): | |||
| __slots__ = 'text', 'line_ctr', 'last_token' | |||
| def __init__(self, text, line_ctr, last_token=None): | |||
| self.text = text | |||
| self.line_ctr = line_ctr | |||
| self.last_token = last_token | |||
| def __eq__(self, other): | |||
| if not isinstance(other, LexerState): | |||
| return NotImplemented | |||
| return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||
| def __copy__(self): | |||
| return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||
| _Callback = Callable[[Token], Token] | |||
| class Lexer(ABC): | |||
| """Lexer interface | |||
| Method Signatures: | |||
| lex(self, text) -> Iterator[Token] | |||
| lex(self, lexer_state, parser_state) -> Iterator[Token] | |||
| """ | |||
| lex: Callable[..., Iterator[Token]] = NotImplemented | |||
| @abstractmethod | |||
| def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||
| ... | |||
| def make_lexer_state(self, text): | |||
| line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') | |||
| @@ -394,6 +418,7 @@ class TraditionalLexer(Lexer): | |||
| def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]: | |||
| if self._mres is None: | |||
| self._build() | |||
| assert self._mres is not None | |||
| return self._mres | |||
| def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]: | |||
| @@ -402,12 +427,12 @@ class TraditionalLexer(Lexer): | |||
| if m: | |||
| return m.group(0), type_from_index[m.lastindex] | |||
| def lex(self, state: Any, parser_state: Any) -> Iterator[Token]: | |||
| def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: | |||
| with suppress(EOFError): | |||
| while True: | |||
| yield self.next_token(state, parser_state) | |||
| def next_token(self, lex_state: Any, parser_state: Any=None) -> Token: | |||
| def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token: | |||
| line_ctr = lex_state.line_ctr | |||
| while line_ctr.char_pos < len(lex_state.text): | |||
| res = self.match(lex_state.text, line_ctr.char_pos) | |||
| @@ -443,24 +468,6 @@ class TraditionalLexer(Lexer): | |||
| raise EOFError(self) | |||
| class LexerState(object): | |||
| __slots__ = 'text', 'line_ctr', 'last_token' | |||
| def __init__(self, text, line_ctr, last_token=None): | |||
| self.text = text | |||
| self.line_ctr = line_ctr | |||
| self.last_token = last_token | |||
| def __eq__(self, other): | |||
| if not isinstance(other, LexerState): | |||
| return NotImplemented | |||
| return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||
| def __copy__(self): | |||
| return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||
| class ContextualLexer(Lexer): | |||
| lexers: Dict[str, TraditionalLexer] | |||
| @@ -494,7 +501,7 @@ class ContextualLexer(Lexer): | |||
| def make_lexer_state(self, text): | |||
| return self.root_lexer.make_lexer_state(text) | |||
| def lex(self, lexer_state: Any, parser_state: Any) -> Iterator[Token]: | |||
| def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||
| try: | |||
| while True: | |||
| lexer = self.lexers[parser_state.position] | |||
| @@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser | |||
| from .tree import Tree | |||
| from .common import LexerConf, ParserConf | |||
| try: | |||
| import regex | |||
| import regex # type: ignore | |||
| except ImportError: | |||
| regex = None | |||
| import re | |||
| @@ -1,8 +1,9 @@ | |||
| try: | |||
| from future_builtins import filter | |||
| from future_builtins import filter # type: ignore | |||
| except ImportError: | |||
| pass | |||
| import sys | |||
| from copy import deepcopy | |||
| @@ -49,7 +50,7 @@ class Tree(object): | |||
| data: str | |||
| children: 'List[Union[str, Tree]]' | |||
| def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None: | |||
| def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None: | |||
| self.data = data | |||
| self.children = children | |||
| self._meta = meta | |||
| @@ -196,7 +197,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||
| possible attributes, see https://www.graphviz.org/doc/info/attrs.html. | |||
| """ | |||
| import pydot | |||
| import pydot # type: ignore | |||
| graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) | |||
| i = [0] | |||
| @@ -134,7 +134,7 @@ def smart_decorator(f, create_decorator): | |||
| try: | |||
| import regex | |||
| import regex # type: ignore | |||
| except ImportError: | |||
| regex = None | |||