@@ -1,8 +1,9 @@ | |||||
from types import ModuleType | |||||
from .utils import Serialize | from .utils import Serialize | ||||
from .lexer import TerminalDef, Token | from .lexer import TerminalDef, Token | ||||
###{standalone | ###{standalone | ||||
from types import ModuleType | |||||
from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING | from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING | ||||
if TYPE_CHECKING: | if TYPE_CHECKING: | ||||
@@ -17,13 +18,13 @@ class LexerConf(Serialize): | |||||
terminals: Collection[TerminalDef] | terminals: Collection[TerminalDef] | ||||
re_module: ModuleType | re_module: ModuleType | ||||
ignore: Collection[str] = () | ignore: Collection[str] = () | ||||
postlex: 'PostLex' = None | |||||
callbacks: Optional[Dict[str, _Callback]] = None | |||||
postlex: 'Optional[PostLex]' = None | |||||
callbacks: Dict[str, _Callback] = {} | |||||
g_regex_flags: int = 0 | g_regex_flags: int = 0 | ||||
skip_validation: bool = False | skip_validation: bool = False | ||||
use_bytes: bool = False | use_bytes: bool = False | ||||
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'PostLex'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): | |||||
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): | |||||
self.terminals = terminals | self.terminals = terminals | ||||
self.terminals_by_name = {t.name: t for t in self.terminals} | self.terminals_by_name = {t.name: t for t in self.terminals} | ||||
assert len(self.terminals) == len(self.terminals_by_name) | assert len(self.terminals) == len(self.terminals_by_name) | ||||
@@ -3,7 +3,7 @@ from .utils import logger, NO_VALUE | |||||
###{standalone | ###{standalone | ||||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING | |||||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING | |||||
if TYPE_CHECKING: | if TYPE_CHECKING: | ||||
from .lexer import Token | from .lexer import Token | ||||
@@ -73,7 +73,7 @@ class UnexpectedInput(LarkError): | |||||
after = text[pos:end].split(b'\n', 1)[0] | after = text[pos:end].split(b'\n', 1)[0] | ||||
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | ||||
def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T: | |||||
def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> Optional[T]: | |||||
"""Allows you to detect what's wrong in the input text by matching | """Allows you to detect what's wrong in the input text by matching | ||||
against example errors. | against example errors. | ||||
@@ -14,12 +14,12 @@ class DedentError(LarkError): | |||||
class Indenter(PostLex, ABC): | class Indenter(PostLex, ABC): | ||||
paren_level: Optional[int] | |||||
indent_level: Optional[List[int]] | |||||
paren_level: int | |||||
indent_level: List[int] | |||||
def __init__(self) -> None: | def __init__(self) -> None: | ||||
self.paren_level = None | |||||
self.indent_level = None | |||||
self.paren_level = 0 | |||||
self.indent_level = [0] | |||||
assert self.tab_len > 0 | assert self.tab_len > 0 | ||||
def handle_NL(self, token: Token) -> Iterator[Token]: | def handle_NL(self, token: Token) -> Iterator[Token]: | ||||
@@ -15,7 +15,7 @@ from .grammar import Rule | |||||
import re | import re | ||||
try: | try: | ||||
import regex | |||||
import regex # type: ignore | |||||
except ImportError: | except ImportError: | ||||
regex = None | regex = None | ||||
@@ -149,7 +149,7 @@ class LarkOptions(Serialize): | |||||
# - As an attribute of `LarkOptions` above | # - As an attribute of `LarkOptions` above | ||||
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | ||||
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | ||||
_defaults = { | |||||
_defaults: Dict[str, Any] = { | |||||
'debug': False, | 'debug': False, | ||||
'keep_all_tokens': False, | 'keep_all_tokens': False, | ||||
'tree_class': None, | 'tree_class': None, | ||||
@@ -414,6 +414,7 @@ class Lark(Serialize): | |||||
if cache_fn: | if cache_fn: | ||||
logger.debug('Saving grammar to cache: %s', cache_fn) | logger.debug('Saving grammar to cache: %s', cache_fn) | ||||
with FS.open(cache_fn, 'wb') as f: | with FS.open(cache_fn, 'wb') as f: | ||||
assert cache_md5 is not None | |||||
f.write(cache_md5.encode('utf8') + b'\n') | f.write(cache_md5.encode('utf8') + b'\n') | ||||
pickle.dump(used_files, f) | pickle.dump(used_files, f) | ||||
self.save(f) | self.save(f) | ||||
@@ -574,7 +575,7 @@ class Lark(Serialize): | |||||
"""Get information about a terminal""" | """Get information about a terminal""" | ||||
return self._terminals_dict[name] | return self._terminals_dict[name] | ||||
def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser': | |||||
def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': | |||||
"""Start an interactive parsing session. | """Start an interactive parsing session. | ||||
Parameters: | Parameters: | ||||
@@ -588,7 +589,7 @@ class Lark(Serialize): | |||||
""" | """ | ||||
return self.parser.parse_interactive(text, start=start) | return self.parser.parse_interactive(text, start=start) | ||||
def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree: | |||||
def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree: | |||||
"""Parse the given text, according to the options provided. | """Parse the given text, according to the options provided. | ||||
Parameters: | Parameters: | ||||
@@ -23,10 +23,10 @@ class Pattern(Serialize, ABC): | |||||
value: str | value: str | ||||
flags: Collection[str] | flags: Collection[str] | ||||
raw: str = None | |||||
type: str = None | |||||
raw: Optional[str] = None | |||||
type: Optional[str] = None | |||||
def __init__(self, value: str, flags: Collection[str]=(), raw: str=None) -> None: | |||||
def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: | |||||
self.value = value | self.value = value | ||||
self.flags = frozenset(flags) | self.flags = frozenset(flags) | ||||
self.raw = raw | self.raw = raw | ||||
@@ -81,7 +81,10 @@ class PatternStr(Pattern): | |||||
@property | @property | ||||
def min_width(self) -> int: | def min_width(self) -> int: | ||||
return len(self.value) | return len(self.value) | ||||
max_width = min_width | |||||
@property | |||||
def max_width(self) -> int: | |||||
return len(self.value) | |||||
class PatternRE(Pattern): | class PatternRE(Pattern): | ||||
@@ -320,15 +323,36 @@ def _regexp_has_newline(r): | |||||
""" | """ | ||||
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | ||||
class LexerState(object): | |||||
__slots__ = 'text', 'line_ctr', 'last_token' | |||||
def __init__(self, text, line_ctr, last_token=None): | |||||
self.text = text | |||||
self.line_ctr = line_ctr | |||||
self.last_token = last_token | |||||
def __eq__(self, other): | |||||
if not isinstance(other, LexerState): | |||||
return NotImplemented | |||||
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||||
def __copy__(self): | |||||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||||
_Callback = Callable[[Token], Token] | _Callback = Callable[[Token], Token] | ||||
class Lexer(ABC): | class Lexer(ABC): | ||||
"""Lexer interface | """Lexer interface | ||||
Method Signatures: | Method Signatures: | ||||
lex(self, text) -> Iterator[Token] | |||||
lex(self, lexer_state, parser_state) -> Iterator[Token] | |||||
""" | """ | ||||
lex: Callable[..., Iterator[Token]] = NotImplemented | |||||
@abstractmethod | |||||
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||||
... | |||||
def make_lexer_state(self, text): | def make_lexer_state(self, text): | ||||
line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') | line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') | ||||
@@ -394,6 +418,7 @@ class TraditionalLexer(Lexer): | |||||
def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]: | def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]: | ||||
if self._mres is None: | if self._mres is None: | ||||
self._build() | self._build() | ||||
assert self._mres is not None | |||||
return self._mres | return self._mres | ||||
def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]: | def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]: | ||||
@@ -402,12 +427,12 @@ class TraditionalLexer(Lexer): | |||||
if m: | if m: | ||||
return m.group(0), type_from_index[m.lastindex] | return m.group(0), type_from_index[m.lastindex] | ||||
def lex(self, state: Any, parser_state: Any) -> Iterator[Token]: | |||||
def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: | |||||
with suppress(EOFError): | with suppress(EOFError): | ||||
while True: | while True: | ||||
yield self.next_token(state, parser_state) | yield self.next_token(state, parser_state) | ||||
def next_token(self, lex_state: Any, parser_state: Any=None) -> Token: | |||||
def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token: | |||||
line_ctr = lex_state.line_ctr | line_ctr = lex_state.line_ctr | ||||
while line_ctr.char_pos < len(lex_state.text): | while line_ctr.char_pos < len(lex_state.text): | ||||
res = self.match(lex_state.text, line_ctr.char_pos) | res = self.match(lex_state.text, line_ctr.char_pos) | ||||
@@ -443,24 +468,6 @@ class TraditionalLexer(Lexer): | |||||
raise EOFError(self) | raise EOFError(self) | ||||
class LexerState(object): | |||||
__slots__ = 'text', 'line_ctr', 'last_token' | |||||
def __init__(self, text, line_ctr, last_token=None): | |||||
self.text = text | |||||
self.line_ctr = line_ctr | |||||
self.last_token = last_token | |||||
def __eq__(self, other): | |||||
if not isinstance(other, LexerState): | |||||
return NotImplemented | |||||
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||||
def __copy__(self): | |||||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||||
class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
lexers: Dict[str, TraditionalLexer] | lexers: Dict[str, TraditionalLexer] | ||||
@@ -494,7 +501,7 @@ class ContextualLexer(Lexer): | |||||
def make_lexer_state(self, text): | def make_lexer_state(self, text): | ||||
return self.root_lexer.make_lexer_state(text) | return self.root_lexer.make_lexer_state(text) | ||||
def lex(self, lexer_state: Any, parser_state: Any) -> Iterator[Token]: | |||||
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||||
try: | try: | ||||
while True: | while True: | ||||
lexer = self.lexers[parser_state.position] | lexer = self.lexers[parser_state.position] | ||||
@@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser | |||||
from .tree import Tree | from .tree import Tree | ||||
from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
try: | try: | ||||
import regex | |||||
import regex # type: ignore | |||||
except ImportError: | except ImportError: | ||||
regex = None | regex = None | ||||
import re | import re | ||||
@@ -1,8 +1,9 @@ | |||||
try: | try: | ||||
from future_builtins import filter | |||||
from future_builtins import filter # type: ignore | |||||
except ImportError: | except ImportError: | ||||
pass | pass | ||||
import sys | |||||
from copy import deepcopy | from copy import deepcopy | ||||
@@ -49,7 +50,7 @@ class Tree(object): | |||||
data: str | data: str | ||||
children: 'List[Union[str, Tree]]' | children: 'List[Union[str, Tree]]' | ||||
def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None: | |||||
def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None: | |||||
self.data = data | self.data = data | ||||
self.children = children | self.children = children | ||||
self._meta = meta | self._meta = meta | ||||
@@ -196,7 +197,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||||
possible attributes, see https://www.graphviz.org/doc/info/attrs.html. | possible attributes, see https://www.graphviz.org/doc/info/attrs.html. | ||||
""" | """ | ||||
import pydot | |||||
import pydot # type: ignore | |||||
graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) | graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) | ||||
i = [0] | i = [0] | ||||
@@ -134,7 +134,7 @@ def smart_decorator(f, create_decorator): | |||||
try: | try: | ||||
import regex | |||||
import regex # type: ignore | |||||
except ImportError: | except ImportError: | ||||
regex = None | regex = None | ||||