@@ -1,8 +1,9 @@ | |||
from types import ModuleType | |||
from .utils import Serialize | |||
from .lexer import TerminalDef, Token | |||
###{standalone | |||
from types import ModuleType | |||
from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING | |||
if TYPE_CHECKING: | |||
@@ -17,13 +18,13 @@ class LexerConf(Serialize): | |||
terminals: Collection[TerminalDef] | |||
re_module: ModuleType | |||
ignore: Collection[str] = () | |||
postlex: 'PostLex' = None | |||
callbacks: Optional[Dict[str, _Callback]] = None | |||
postlex: 'Optional[PostLex]' = None | |||
callbacks: Dict[str, _Callback] = {} | |||
g_regex_flags: int = 0 | |||
skip_validation: bool = False | |||
use_bytes: bool = False | |||
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'PostLex'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): | |||
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): | |||
self.terminals = terminals | |||
self.terminals_by_name = {t.name: t for t in self.terminals} | |||
assert len(self.terminals) == len(self.terminals_by_name) | |||
@@ -3,7 +3,7 @@ from .utils import logger, NO_VALUE | |||
###{standalone | |||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING | |||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING | |||
if TYPE_CHECKING: | |||
from .lexer import Token | |||
@@ -73,7 +73,7 @@ class UnexpectedInput(LarkError): | |||
after = text[pos:end].split(b'\n', 1)[0] | |||
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | |||
def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T: | |||
def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> Optional[T]: | |||
"""Allows you to detect what's wrong in the input text by matching | |||
against example errors. | |||
@@ -14,12 +14,12 @@ class DedentError(LarkError): | |||
class Indenter(PostLex, ABC): | |||
paren_level: Optional[int] | |||
indent_level: Optional[List[int]] | |||
paren_level: int | |||
indent_level: List[int] | |||
def __init__(self) -> None: | |||
self.paren_level = None | |||
self.indent_level = None | |||
self.paren_level = 0 | |||
self.indent_level = [0] | |||
assert self.tab_len > 0 | |||
def handle_NL(self, token: Token) -> Iterator[Token]: | |||
@@ -15,7 +15,7 @@ from .grammar import Rule | |||
import re | |||
try: | |||
import regex | |||
import regex # type: ignore | |||
except ImportError: | |||
regex = None | |||
@@ -149,7 +149,7 @@ class LarkOptions(Serialize): | |||
# - As an attribute of `LarkOptions` above | |||
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | |||
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | |||
_defaults = { | |||
_defaults: Dict[str, Any] = { | |||
'debug': False, | |||
'keep_all_tokens': False, | |||
'tree_class': None, | |||
@@ -414,6 +414,7 @@ class Lark(Serialize): | |||
if cache_fn: | |||
logger.debug('Saving grammar to cache: %s', cache_fn) | |||
with FS.open(cache_fn, 'wb') as f: | |||
assert cache_md5 is not None | |||
f.write(cache_md5.encode('utf8') + b'\n') | |||
pickle.dump(used_files, f) | |||
self.save(f) | |||
@@ -574,7 +575,7 @@ class Lark(Serialize): | |||
"""Get information about a terminal""" | |||
return self._terminals_dict[name] | |||
def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser': | |||
def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': | |||
"""Start an interactive parsing session. | |||
Parameters: | |||
@@ -588,7 +589,7 @@ class Lark(Serialize): | |||
""" | |||
return self.parser.parse_interactive(text, start=start) | |||
def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree: | |||
def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree: | |||
"""Parse the given text, according to the options provided. | |||
Parameters: | |||
@@ -23,10 +23,10 @@ class Pattern(Serialize, ABC): | |||
value: str | |||
flags: Collection[str] | |||
raw: str = None | |||
type: str = None | |||
raw: Optional[str] = None | |||
type: Optional[str] = None | |||
def __init__(self, value: str, flags: Collection[str]=(), raw: str=None) -> None: | |||
def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: | |||
self.value = value | |||
self.flags = frozenset(flags) | |||
self.raw = raw | |||
@@ -81,7 +81,10 @@ class PatternStr(Pattern): | |||
@property | |||
def min_width(self) -> int: | |||
return len(self.value) | |||
max_width = min_width | |||
@property | |||
def max_width(self) -> int: | |||
return len(self.value) | |||
class PatternRE(Pattern): | |||
@@ -320,15 +323,36 @@ def _regexp_has_newline(r): | |||
""" | |||
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | |||
class LexerState(object): | |||
__slots__ = 'text', 'line_ctr', 'last_token' | |||
def __init__(self, text, line_ctr, last_token=None): | |||
self.text = text | |||
self.line_ctr = line_ctr | |||
self.last_token = last_token | |||
def __eq__(self, other): | |||
if not isinstance(other, LexerState): | |||
return NotImplemented | |||
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||
def __copy__(self): | |||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||
_Callback = Callable[[Token], Token] | |||
class Lexer(ABC): | |||
"""Lexer interface | |||
Method Signatures: | |||
lex(self, text) -> Iterator[Token] | |||
lex(self, lexer_state, parser_state) -> Iterator[Token] | |||
""" | |||
lex: Callable[..., Iterator[Token]] = NotImplemented | |||
@abstractmethod | |||
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||
... | |||
def make_lexer_state(self, text): | |||
line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') | |||
@@ -394,6 +418,7 @@ class TraditionalLexer(Lexer): | |||
def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]: | |||
if self._mres is None: | |||
self._build() | |||
assert self._mres is not None | |||
return self._mres | |||
def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]: | |||
@@ -402,12 +427,12 @@ class TraditionalLexer(Lexer): | |||
if m: | |||
return m.group(0), type_from_index[m.lastindex] | |||
def lex(self, state: Any, parser_state: Any) -> Iterator[Token]: | |||
def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: | |||
with suppress(EOFError): | |||
while True: | |||
yield self.next_token(state, parser_state) | |||
def next_token(self, lex_state: Any, parser_state: Any=None) -> Token: | |||
def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token: | |||
line_ctr = lex_state.line_ctr | |||
while line_ctr.char_pos < len(lex_state.text): | |||
res = self.match(lex_state.text, line_ctr.char_pos) | |||
@@ -443,24 +468,6 @@ class TraditionalLexer(Lexer): | |||
raise EOFError(self) | |||
class LexerState(object): | |||
__slots__ = 'text', 'line_ctr', 'last_token' | |||
def __init__(self, text, line_ctr, last_token=None): | |||
self.text = text | |||
self.line_ctr = line_ctr | |||
self.last_token = last_token | |||
def __eq__(self, other): | |||
if not isinstance(other, LexerState): | |||
return NotImplemented | |||
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||
def __copy__(self): | |||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||
class ContextualLexer(Lexer): | |||
lexers: Dict[str, TraditionalLexer] | |||
@@ -494,7 +501,7 @@ class ContextualLexer(Lexer): | |||
def make_lexer_state(self, text): | |||
return self.root_lexer.make_lexer_state(text) | |||
def lex(self, lexer_state: Any, parser_state: Any) -> Iterator[Token]: | |||
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||
try: | |||
while True: | |||
lexer = self.lexers[parser_state.position] | |||
@@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser | |||
from .tree import Tree | |||
from .common import LexerConf, ParserConf | |||
try: | |||
import regex | |||
import regex # type: ignore | |||
except ImportError: | |||
regex = None | |||
import re | |||
@@ -1,8 +1,9 @@ | |||
try: | |||
from future_builtins import filter | |||
from future_builtins import filter # type: ignore | |||
except ImportError: | |||
pass | |||
import sys | |||
from copy import deepcopy | |||
@@ -49,7 +50,7 @@ class Tree(object): | |||
data: str | |||
children: 'List[Union[str, Tree]]' | |||
def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None: | |||
def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None: | |||
self.data = data | |||
self.children = children | |||
self._meta = meta | |||
@@ -196,7 +197,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||
possible attributes, see https://www.graphviz.org/doc/info/attrs.html. | |||
""" | |||
import pydot | |||
import pydot # type: ignore | |||
graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) | |||
i = [0] | |||
@@ -134,7 +134,7 @@ def smart_decorator(f, create_decorator): | |||
try: | |||
import regex | |||
import regex # type: ignore | |||
except ImportError: | |||
regex = None | |||