Merge pull request #926 from chanicpanic/v1.0-typing

3 vuotta sitten · 3e2a61aa14
--- a/.github/workflows/mypy.yml
+++ b/.github/workflows/mypy.yml
@@ -16,4 +16,4 @@ jobs:
          python -m pip install --upgrade pip
          pip install mypy
      - name: Lint with mypy
        run: mypy -p lark-stubs || true
        run: mypy -p lark || true
--- a/lark-stubs/init.pyi
+++ b/lark-stubs/init.pyi
@@ -1,12 +0,0 @@
 # -*- coding: utf-8 -*-

 from .tree import *
 from .visitors import *
 from .exceptions import *
 from .lexer import *
 from .load_grammar import *
 from .lark import *
 from logging import Logger as _Logger

 logger: _Logger
 __version__: str = ...
--- a/lark-stubs/ast_utils.pyi
+++ b/lark-stubs/ast_utils.pyi
@@ -1,17 +0,0 @@
 import types
 from typing import Optional

 from .visitors import Transformer

 class Ast(object):
    pass

 class AsList(object):
    pass


 def create_transformer(
        ast_module: types.ModuleType,
        transformer: Optional[Transformer]=None
 ) -> Transformer:
    ...
--- a/lark-stubs/exceptions.pyi
+++ b/lark-stubs/exceptions.pyi
@@ -1,65 +0,0 @@
 # -*- coding: utf-8 -*-

 from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
 from .tree import Tree
 from .lexer import Token
 from .parsers.lalr_interactive_parser import InteractiveParser

 class LarkError(Exception):
    pass


 class ConfigurationError(LarkError, ValueError):
    pass


 class GrammarError(LarkError):
    pass


 class ParseError(LarkError):
    pass


 class LexError(LarkError):
    pass


 T = TypeVar('T')

 class UnexpectedEOF(ParseError):
    expected: List[Token]

 class UnexpectedInput(LarkError):
    line: int
    column: int
    pos_in_stream: int
    state: Any

    def get_context(self, text: str, span: int = ...) -> str:
        ...

    def match_examples(
            self,
            parse_fn: Callable[[str], Tree],
            examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
            token_type_match_fallback: bool = False,
            use_accepts: bool = False,
    ) -> T:
        ...


 class UnexpectedToken(ParseError, UnexpectedInput):
    expected: Set[str]
    considered_rules: Set[str]
    interactive_parser: InteractiveParser
    accepts: Set[str]

 class UnexpectedCharacters(LexError, UnexpectedInput):
    allowed: Set[str]
    considered_tokens: Set[Any]


 class VisitError(LarkError):
    obj: Union[Tree, Token]
    orig_exc: Exception
--- a/lark-stubs/grammar.pyi
+++ b/lark-stubs/grammar.pyi
@@ -1,14 +0,0 @@
 from typing import Optional, Tuple


 class RuleOptions:
    keep_all_tokens: bool
    expand1: bool
    priority: int
    template_source: Optional[str]
    empty_indices: Tuple[bool, ...]


 class Symbol:
    name: str
    is_term: bool
--- a/lark-stubs/indenter.pyi
+++ b/lark-stubs/indenter.pyi
@@ -1,47 +0,0 @@
 # -*- coding: utf-8 -*-

 from typing import Tuple, List, Iterator, Optional
 from abc import ABC, abstractmethod
 from .lexer import Token
 from .lark import PostLex


 class Indenter(PostLex, ABC):
    paren_level: Optional[int]
    indent_level: Optional[List[int]]

    def __init__(self) -> None:
        ...

    def handle_NL(self, token: Token) -> Iterator[Token]:
        ...

    @property
    @abstractmethod
    def NL_type(self) -> str:
        ...

    @property
    @abstractmethod
    def OPEN_PAREN_types(self) -> List[str]:
        ...

    @property
    @abstractmethod
    def CLOSE_PAREN_types(self) -> List[str]:
        ...

    @property
    @abstractmethod
    def INDENT_type(self) -> str:
        ...

    @property
    @abstractmethod
    def DEDENT_type(self) -> str:
        ...

    @property
    @abstractmethod
    def tab_len(self) -> int:
        ...
--- a/lark-stubs/lark.pyi
+++ b/lark-stubs/lark.pyi
@@ -1,109 +0,0 @@
 # -*- coding: utf-8 -*-

 from typing import (
    TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
    Literal, Protocol, Tuple, Iterable,
 )

 from .parsers.lalr_interactive_parser import InteractiveParser
 from .visitors import Transformer
 from .lexer import Token, Lexer, TerminalDef
 from .tree import Tree
 from .exceptions import UnexpectedInput
 from .load_grammar import Grammar

 _T = TypeVar('_T')


 class PostLex(Protocol):

    def process(self, stream: Iterator[Token]) -> Iterator[Token]:
        ...

    always_accept: Iterable[str]


 class LarkOptions:
    start: List[str]
    parser: str
    lexer: str
    transformer: Optional[Transformer]
    postlex: Optional[PostLex]
    ambiguity: str
    regex: bool
    debug: bool
    keep_all_tokens: bool
    propagate_positions: Union[bool, str]
    maybe_placeholders: bool
    lexer_callbacks: Dict[str, Callable[[Token], Token]]
    cache: Union[bool, str]
    g_regex_flags: int
    use_bytes: bool
    import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]
    source_path: Optional[str]


 class PackageResource(object):
    pkg_name: str
    path: str

    def __init__(self, pkg_name: str, path: str): ...


 class FromPackageLoader:
    def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...

    def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ...


 class Lark:
    source_path: str
    source_grammar: str
    grammar: Grammar
    options: LarkOptions
    lexer: Lexer
    terminals: List[TerminalDef]

    def __init__(
        self,
        grammar: Union[Grammar, str, IO[str]],
        *,
        start: Union[None, str, List[str]] = "start",
        parser: Literal["earley", "lalr", "cyk", "auto"] = "auto",
        lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto",
        transformer: Optional[Transformer] = None,
        postlex: Optional[PostLex] = None,
        ambiguity: Literal["explicit", "resolve"] = "resolve",
        regex: bool = False,
        debug: bool = False,
        keep_all_tokens: bool = False,
        propagate_positions: Union[bool, str] = False,
        maybe_placeholders: bool = False,
        lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None,
        cache: Union[bool, str] = False,
        g_regex_flags: int = ...,
        use_bytes: bool = False,
        import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] = ...,
        source_path: Optional[str]=None,
    ):
        ...

    def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
        ...

    def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser:
        ...

    @classmethod
    def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T:
        ...

    @classmethod
    def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T:
        ...

    def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]:
        ...

    def get_terminal(self, name: str) -> TerminalDef:
        ...
--- a/lark-stubs/lexer.pyi
+++ b/lark-stubs/lexer.pyi
@@ -1,161 +0,0 @@
 # -*- coding: utf-8 -*-
 from types import ModuleType
 from typing import (
    TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
    Pattern as REPattern,
 )
 from abc import abstractmethod, ABC

 _T = TypeVar('_T')


 class Pattern(ABC):
    value: str
    flags: Collection[str]
    raw: str
    type: str

    def __init__(self, value: str, flags: Collection[str] = (), raw: str = None) -> None:
        ...

    @abstractmethod
    def to_regexp(self) -> str:
        ...

    @property
    @abstractmethod
    def min_width(self) -> int:
        ...

    @property
    @abstractmethod
    def max_width(self) -> int:
        ...


 class PatternStr(Pattern):
    type: str = ...

    def to_regexp(self) -> str:
        ...

    @property
    def min_width(self) -> int:
        ...

    @property
    def max_width(self) -> int:
        ...


 class PatternRE(Pattern):
    type: str = ...

    def to_regexp(self) -> str:
        ...

    @property
    def min_width(self) -> int:
        ...

    @property
    def max_width(self) -> int:
        ...


 class TerminalDef:
    name: str
    pattern: Pattern
    priority: int

    def __init__(self, name: str, pattern: Pattern, priority: int = ...) -> None:
        ...

    def user_repr(self) -> str: ...


 class Token(str):
    type: str
    start_pos: int
    value: Any
    line: int
    column: int
    end_line: int
    end_column: int
    end_pos: int

    def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None:
        ...

    def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token:
        ...

    @classmethod
    def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: Token) -> _T:
        ...


 _Callback = Callable[[Token], Token]


 class Lexer(ABC):
    lex: Callable[..., Iterator[Token]]


 class LexerConf:
     tokens: Collection[TerminalDef]
     re_module: ModuleType
     ignore: Collection[str] = ()
     postlex: Any =None
     callbacks: Optional[Dict[str, _Callback]] = None
     g_regex_flags: int = 0
     skip_validation: bool = False
     use_bytes: bool = False



 class TraditionalLexer(Lexer):
    terminals: Collection[TerminalDef]
    ignore_types: FrozenSet[str]
    newline_types: FrozenSet[str]
    user_callbacks: Dict[str, _Callback]
    callback: Dict[str, _Callback]
    mres: List[Tuple[REPattern, Dict[int, str]]]
    re: ModuleType

    def __init__(
        self,
        conf: LexerConf
    ) -> None:
        ...

    def build(self) -> None:
        ...

    def match(self, stream: str, pos: int) -> Optional[Tuple[str, str]]:
        ...

    def lex(self, stream: str) -> Iterator[Token]:
        ...

    def next_token(self, lex_state: Any, parser_state: Any = None) -> Token:
        ...

 class ContextualLexer(Lexer):
    lexers: Dict[str, TraditionalLexer]
    root_lexer: TraditionalLexer

    def __init__(
        self,
        terminals: Collection[TerminalDef],
        states: Dict[str, Collection[str]],
        re_: ModuleType,
        ignore: Collection[str] = ...,
        always_accept: Collection[str] = ...,
        user_callbacks: Dict[str, _Callback] = ...,
        g_regex_flags: int = ...
    ) -> None:
        ...

    def lex(self, stream: str, get_parser_state: Callable[[], str]) -> Iterator[Token]:
        ...
--- a/lark-stubs/load_grammar.pyi
+++ b/lark-stubs/load_grammar.pyi
@@ -1,31 +0,0 @@
 from typing import List, Tuple, Union, Callable, Dict, Optional

 from .tree import Tree
 from .grammar import RuleOptions
 from .exceptions import UnexpectedInput


 class Grammar:
    rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]]
    term_defs: List[Tuple[str, Tuple[Tree, int]]]
    ignore: List[str]


 class GrammarBuilder:
    global_keep_all_tokens: bool
    import_paths: List[Union[str, Callable]]
    used_files: Dict[str, str]

    def __init__(self, global_keep_all_tokens: bool = False, import_paths: List[Union[str, Callable]] = None, used_files: Dict[str, str]=None) -> None: ...

    def load_grammar(self, grammar_text: str, grammar_name: str = ..., mangle: Callable[[str], str] = None) -> None: ...

    def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str],
                  base_mangle: Callable[[str], str] = None) -> None:  ...

    def validate(self) -> None: ...

    def build(self) -> Grammar: ...


 def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: ...
--- a/lark-stubs/parsers/init.pyi
+++ b/lark-stubs/parsers/init.pyi
--- a/lark-stubs/reconstruct.pyi
+++ b/lark-stubs/reconstruct.pyi
@@ -1,39 +0,0 @@
 # -*- coding: utf-8 -*-

 from typing import List, Dict, Union, Callable, Iterable

 from .grammar import Symbol
 from .lark import Lark
 from .tree import Tree
 from .visitors import Transformer_InPlace
 from .lexer import TerminalDef


 class WriteTokensTransformer(Transformer_InPlace):

    def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ...


 class MatchTree(Tree):
    pass


 class MakeMatchTree:
    name: str
    expansion: List[TerminalDef]

    def __init__(self, name: str, expansion: List[TerminalDef]):
        ...

    def __call__(self, args: List[Union[str, Tree]]):
        ...


 class Reconstructor:

    def __init__(self, parser: Lark, term_subs: Dict[str, Callable[[Symbol], str]] = ...):
        ...

    def reconstruct(self, tree: Tree, postproc: Callable[[Iterable[str]], Iterable[str]]=None,
                    insert_spaces: bool = True) -> str:
        ...
--- a/lark-stubs/tree.pyi
+++ b/lark-stubs/tree.pyi
@@ -1,72 +0,0 @@
 # -*- coding: utf-8 -*-

 from typing import List, Callable, Iterator, Union, Optional, Literal, Any
 from .lexer import TerminalDef

 class Meta:
    empty: bool
    line: int
    column: int
    start_pos: int
    end_line: int
    end_column: int
    end_pos: int
    orig_expansion: List[TerminalDef]
    match_tree: bool


 class Tree:
    data: str
    children: List[Union[str, Tree]]
    meta: Meta

    def __init__(
        self,
        data: str,
        children: List[Union[str, Tree]],
        meta: Optional[Meta] = None
    ) -> None:
        ...

    def pretty(self, indent_str: str = ...) -> str:
        ...

    def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]:
        ...

    def find_data(self, data: str) -> Iterator[Tree]:
        ...

    def expand_kids_by_index(self, *indices: int) -> None:
        ...

    def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]:
        ...

    def iter_subtrees(self) -> Iterator[Tree]:
        ...

    def iter_subtrees_topdown(self) -> Iterator[Tree]:
        ...

    def copy(self) -> Tree:
        ...

    def set(self, data: str, children: List[Union[str, Tree]]) -> None:
        ...

    def __hash__(self) -> int:
        ...


 class SlottedTree(Tree):
    pass


 def pydot__tree_to_png(
    tree: Tree,
    filename: str,
    rankdir: Literal["TB", "LR", "BT", "RL"] = ...,
    **kwargs
 ) -> None:
    ...
--- a/lark-stubs/visitors.pyi
+++ b/lark-stubs/visitors.pyi
@@ -1,108 +0,0 @@
 # -*- coding: utf-8 -*-

 from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union
 from abc import ABC
 from .tree import Tree

 _T = TypeVar('_T')
 _R = TypeVar('_R')
 _FUNC = Callable[..., _T]
 _DECORATED = Union[_FUNC, type]


 class Transformer(ABC, Generic[_T]):

    def __init__(self, visit_tokens: bool = True) -> None:
        ...

    def transform(self, tree: Tree) -> _T:
        ...

    def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]:
        ...


 class TransformerChain(Generic[_T]):
    transformers: Tuple[Transformer[_T], ...]

    def __init__(self, *transformers: Transformer[_T]) -> None:
        ...

    def transform(self, tree: Tree) -> _T:
        ...

    def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]:
        ...


 class Transformer_InPlace(Transformer):
    pass


 class Transformer_NonRecursive(Transformer):
    pass


 class Transformer_InPlaceRecursive(Transformer):
    pass


 class VisitorBase:
    pass


 class Visitor(VisitorBase, ABC, Generic[_T]):

    def visit(self, tree: Tree) -> Tree:
        ...

    def visit_topdown(self, tree: Tree) -> Tree:
        ...


 class Visitor_Recursive(VisitorBase):

    def visit(self, tree: Tree) -> Tree:
        ...

    def visit_topdown(self, tree: Tree) -> Tree:
        ...


 class Interpreter(ABC, Generic[_T]):

    def visit(self, tree: Tree) -> _T:
        ...

    def visit_children(self, tree: Tree) -> List[_T]:
        ...


 _InterMethod = Callable[[Type[Interpreter], _T], _R]


 def v_args(
        inline: bool = False,
        meta: bool = False,
        tree: bool = False,
        wrapper: Callable = None
 ) -> Callable[[_DECORATED], _DECORATED]:
    ...


 def visit_children_decor(func: _InterMethod) -> _InterMethod:
    ...


 class Discard(Exception):
    pass


 # Deprecated
 class InlineTransformer:
    pass


 # Deprecated
 def inline_args(obj: _FUNC) -> _FUNC:
    ...
--- a/lark/init.py
+++ b/lark/init.py
@@ -6,4 +6,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
 from .lexer import Token
 from .lark import Lark

 __version__ = "1.0.0a"
 __version__: str = "1.0.0a"
--- a/lark/ast_utils.py
+++ b/lark/ast_utils.py
@@ -3,6 +3,8 @@
 """

 import inspect, re
 import types
 from typing import Optional

 from lark import Transformer, v_args

@@ -27,7 +29,7 @@ def _call(func, _data, children, _meta):

 inline = v_args(wrapper=_call)

 def create_transformer(ast_module, transformer=None):
 def create_transformer(ast_module: types.ModuleType, transformer: Optional[Transformer]=None) -> Transformer:
    """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST.

    For each class, we create a corresponding rule in the transformer, with a matching name.
@@ -49,4 +51,4 @@ def create_transformer(ast_module, transformer=None):

                setattr(t, camel_to_snake(name), obj)

    return t
    return t
--- a/lark/common.py
+++ b/lark/common.py
@@ -1,14 +1,30 @@
 from types import ModuleType

 from .utils import Serialize
 from .lexer import TerminalDef
 from .lexer import TerminalDef, Token

 ###{standalone
 from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING

 if TYPE_CHECKING:
    from .lark import PostLex

 _Callback = Callable[[Token], Token]

 class LexerConf(Serialize):
    __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
    __serialize_namespace__ = TerminalDef,

    def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False):
    terminals: Collection[TerminalDef]
    re_module: ModuleType
    ignore: Collection[str]
    postlex: 'Optional[PostLex]'
    callbacks: Dict[str, _Callback]
    g_regex_flags: int
    skip_validation: bool
    use_bytes: bool

    def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False):
        self.terminals = terminals
        self.terminals_by_name = {t.name: t for t in self.terminals}
        assert len(self.terminals) == len(self.terminals_by_name)
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -3,6 +3,12 @@ from .utils import logger, NO_VALUE

 ###{standalone

 from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING

 if TYPE_CHECKING:
    from .lexer import Token
    from .parsers.lalr_interactive_parser import InteractiveParser
    from .tree import Tree

 class LarkError(Exception):
    pass
@@ -28,6 +34,7 @@ class ParseError(LarkError):
 class LexError(LarkError):
    pass

 T = TypeVar('T')

 class UnexpectedInput(LarkError):
    """UnexpectedInput Error.
@@ -39,10 +46,13 @@ class UnexpectedInput(LarkError):

    After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
    """
    line: int
    column: int
    pos_in_stream = None
    state: Any
    _terminals_by_name = None

    def get_context(self, text, span=40):
    def get_context(self, text: str, span: int=40) -> str:
        """Returns a pretty string pinpointing the error in the text,
        with span amount of context characters around it.

@@ -63,7 +73,7 @@ class UnexpectedInput(LarkError):
            after = text[pos:end].split(b'\n', 1)[0]
            return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")

    def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
    def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> Optional[T]:
        """Allows you to detect what's wrong in the input text by matching
        against example errors.

@@ -126,6 +136,9 @@ class UnexpectedInput(LarkError):


 class UnexpectedEOF(ParseError, UnexpectedInput):

    expected: 'List[Token]'

    def __init__(self, expected, state=None, terminals_by_name=None):
        self.expected = expected
        self.state = state
@@ -145,6 +158,10 @@ class UnexpectedEOF(ParseError, UnexpectedInput):


 class UnexpectedCharacters(LexError, UnexpectedInput):

    allowed: Set[str]
    considered_tokens: Set[Any]

    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
                 terminals_by_name=None, considered_rules=None):
        # TODO considered_tokens and allowed can be figured out using state
@@ -187,6 +204,10 @@ class UnexpectedToken(ParseError, UnexpectedInput):
    see: ``InteractiveParser``.
    """

    expected: Set[str]
    considered_rules: Set[str]
    interactive_parser: 'InteractiveParser'

    def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
        # TODO considered_rules and expected can be figured out using state
        self.line = getattr(token, 'line', '?')
@@ -205,7 +226,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
        super(UnexpectedToken, self).__init__()

    @property
    def accepts(self):
    def accepts(self) -> Set[str]:
        if self._accepts is NO_VALUE:
            self._accepts = self.interactive_parser and self.interactive_parser.accepts()
        return self._accepts
@@ -228,6 +249,9 @@ class VisitError(LarkError):
    - orig_exc: the exception that cause it to fail
    """

    obj: 'Union[Tree, Token]'
    orig_exc: Exception

    def __init__(self, rule, obj, orig_exc):
        self.obj = obj
        self.orig_exc = orig_exc
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -1,13 +1,17 @@

 from .utils import Serialize

 ###{standalone

 from typing import Optional, Tuple, ClassVar

 class Symbol(Serialize):
    __slots__ = ('name',)

    is_term = NotImplemented
    name: str
    is_term: ClassVar[bool] = NotImplemented

    def __init__(self, name):
    def __init__(self, name: str) -> None:
        self.name = name

    def __eq__(self, other):
@@ -29,7 +33,7 @@ class Symbol(Serialize):
 class Terminal(Symbol):
    __serialize_fields__ = 'name', 'filter_out'

    is_term = True
    is_term: ClassVar[bool] = True

    def __init__(self, name, filter_out=False):
        self.name = name
@@ -43,13 +47,19 @@ class Terminal(Symbol):
 class NonTerminal(Symbol):
    __serialize_fields__ = 'name',

    is_term = False
    is_term: ClassVar[bool] = False


 class RuleOptions(Serialize):
    __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'

    def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()):
    keep_all_tokens: bool
    expand1: bool
    priority: Optional[int]
    template_source: Optional[str]
    empty_indices: Tuple[bool, ...]

    def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None:
        self.keep_all_tokens = keep_all_tokens
        self.expand1 = expand1
        self.priority = priority
--- a/lark/indenter.py
+++ b/lark/indenter.py
@@ -1,20 +1,28 @@
 "Provides Indentation services for languages with indentation similar to Python"

 from abc import ABC, abstractmethod

 from .exceptions import LarkError
 from .lark import PostLex
 from .lexer import Token

 ###{standalone
 from typing import Tuple, List, Iterator, Optional

 class DedentError(LarkError):
    pass

 class Indenter(PostLex):
    def __init__(self):
        self.paren_level = None
        self.indent_level = None
 class Indenter(PostLex, ABC):

    paren_level: int
    indent_level: List[int]

    def __init__(self) -> None:
        self.paren_level = 0
        self.indent_level = [0]
        assert self.tab_len > 0

    def handle_NL(self, token):
    def handle_NL(self, token: Token) -> Iterator[Token]:
        if self.paren_level > 0:
            return

@@ -64,4 +72,34 @@ class Indenter(PostLex):
    def always_accept(self):
        return (self.NL_type,)

    @property
    @abstractmethod
    def NL_type(self) -> str:
        ...

    @property
    @abstractmethod
    def OPEN_PAREN_types(self) -> List[str]:
        ...

    @property
    @abstractmethod
    def CLOSE_PAREN_types(self) -> List[str]:
        ...

    @property
    @abstractmethod
    def INDENT_type(self) -> str:
        ...

    @property
    @abstractmethod
    def DEDENT_type(self) -> str:
        ...

    @property
    @abstractmethod
    def tab_len(self) -> int:
        ...

 ###}
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -2,31 +2,71 @@ from abc import ABC, abstractmethod
 import sys, os, pickle, hashlib
 import tempfile

 from .exceptions import ConfigurationError, assert_config
 from .exceptions import ConfigurationError, assert_config, UnexpectedInput
 from .utils import Serialize, SerializeMemoizer, FS, isascii, logger
 from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files
 from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource
 from .tree import Tree
 from .common import LexerConf, ParserConf

 from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread
 from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token
 from .parse_tree_builder import ParseTreeBuilder
 from .parser_frontends import get_frontend, _get_lexer_callbacks
 from .grammar import Rule

 import re
 try:
    import regex
    import regex  # type: ignore
 except ImportError:
    regex = None


 ###{standalone
 from typing import (
    TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional,
    Tuple, Iterable, IO, Any, TYPE_CHECKING
 )

 if TYPE_CHECKING:
    from .parsers.lalr_interactive_parser import InteractiveParser
    from .visitors import Transformer
    if sys.version_info >= (3, 8):
        from typing import Literal
    else:
        from typing_extensions import Literal

 class PostLex(ABC):
    @abstractmethod
    def process(self, stream: Iterator[Token]) -> Iterator[Token]:
        return stream

    always_accept: Iterable[str] = ()

 class LarkOptions(Serialize):
    """Specifies the options for Lark

    """

    start: List[str]
    debug: bool
    transformer: 'Optional[Transformer]'
    propagate_positions: Union[bool, str]
    maybe_placeholders: bool
    cache: Union[bool, str]
    regex: bool
    g_regex_flags: int
    keep_all_tokens: bool
    tree_class: Any
    parser: 'Literal["earley", "lalr", "cyk", "auto"]'
    lexer: 'Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
    ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
    postlex: Optional[PostLex]
    priority: 'Optional[Literal["auto", "normal", "invert"]]'
    lexer_callbacks: Dict[str, Callable[[Token], Token]]
    use_bytes: bool
    edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
    import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
    source_path: Optional[str]

    OPTIONS_DOC = """
    **===  General Options  ===**

@@ -106,12 +146,10 @@ class LarkOptions(Serialize):
    # Adding a new option needs to be done in multiple places:
    # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts
    # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs
    # - In `lark-stubs/lark.pyi`:
    #   - As attribute to `LarkOptions`
    #   - As parameter to `Lark.__init__`
    # - As an attribute of `LarkOptions` above
    # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
    # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
    _defaults = {
    _defaults: Dict[str, Any] = {
        'debug': False,
        'keep_all_tokens': False,
        'tree_class': None,
@@ -189,13 +227,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
 _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')


 class PostLex(ABC):
    @abstractmethod
    def process(self, stream):
        return stream

    always_accept = ()

 _T = TypeVar('_T')

 class Lark(Serialize):
    """Main interface for the library.
@@ -210,7 +242,15 @@ class Lark(Serialize):
        >>> Lark(r'''start: "foo" ''')
        Lark(...)
    """
    def __init__(self, grammar, **options):

    source_path: str
    source_grammar: str
    grammar: 'Grammar'
    options: LarkOptions
    lexer: Lexer
    terminals: List[TerminalDef]

    def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
        self.options = LarkOptions(options)

        # Set regex or re module
@@ -374,6 +414,7 @@ class Lark(Serialize):
        if cache_fn:
            logger.debug('Saving grammar to cache: %s', cache_fn)
            with FS.open(cache_fn, 'wb') as f:
                assert cache_md5 is not None
                f.write(cache_md5.encode('utf8') + b'\n')
                pickle.dump(used_files, f)
                self.save(f)
@@ -476,7 +517,7 @@ class Lark(Serialize):
        return inst._load({'data': data, 'memo': memo}, **kwargs)

    @classmethod
    def open(cls, grammar_filename, rel_to=None, **options):
    def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
        """Create an instance of Lark with the grammar given by its filename

        If ``rel_to`` is provided, the function will find the grammar filename in relation to it.
@@ -494,7 +535,7 @@ class Lark(Serialize):
            return cls(f, **options)

    @classmethod
    def open_from_package(cls, package, grammar_path, search_paths=("",), **options):
    def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T:
        """Create an instance of Lark with the grammar loaded from within the package `package`.
        This allows grammar loading from zipapps.

@@ -515,7 +556,7 @@ class Lark(Serialize):
        return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)


    def lex(self, text, dont_ignore=False):
    def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
        """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'

        When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.
@@ -530,11 +571,11 @@ class Lark(Serialize):
            return self.options.postlex.process(stream)
        return stream

    def get_terminal(self, name):
    def get_terminal(self, name: str) -> TerminalDef:
        """Get information about a terminal"""
        return self._terminals_dict[name]
    
    def parse_interactive(self, text=None, start=None):
    def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser':
        """Start an interactive parsing session.

        Parameters:
@@ -548,7 +589,7 @@ class Lark(Serialize):
        """
        return self.parser.parse_interactive(text, start=start)

    def parse(self, text, start=None, on_error=None):
    def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree:
        """Parse the given text, according to the options provided.

        Parameters:
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -1,5 +1,6 @@
 # Lexer Implementation

 from abc import abstractmethod, ABC
 import re
 from contextlib import suppress

@@ -9,12 +10,23 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
 ###{standalone
 from copy import copy

 from types import ModuleType
 from typing import (
    TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
    Pattern as REPattern, ClassVar, TYPE_CHECKING
 )

 class Pattern(Serialize):
    raw = None
    type = None
 if TYPE_CHECKING:
    from .common import LexerConf

    def __init__(self, value, flags=(), raw=None):
 class Pattern(Serialize, ABC):

    value: str
    flags: Collection[str]
    raw: Optional[str]
    type: ClassVar[str]

    def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None:
        self.value = value
        self.flags = frozenset(flags)
        self.raw = raw
@@ -29,13 +41,18 @@ class Pattern(Serialize):
    def __eq__(self, other):
        return type(self) == type(other) and self.value == other.value and self.flags == other.flags

    def to_regexp(self):
    @abstractmethod
    def to_regexp(self) -> str:
        raise NotImplementedError()

    def min_width(self):
    @property
    @abstractmethod
    def min_width(self) -> int:
        raise NotImplementedError()

    def max_width(self):
    @property
    @abstractmethod
    def max_width(self) -> int:
        raise NotImplementedError()

    def _get_flags(self, value):
@@ -47,23 +64,26 @@ class Pattern(Serialize):
 class PatternStr(Pattern):
    __serialize_fields__ = 'value', 'flags'

    type = "str"
    type: ClassVar[str] = "str"

    def to_regexp(self):
    def to_regexp(self) -> str:
        return self._get_flags(re.escape(self.value))

    @property
    def min_width(self):
    def min_width(self) -> int:
        return len(self.value)

    @property
    def max_width(self) -> int:
        return len(self.value)
    max_width = min_width


 class PatternRE(Pattern):
    __serialize_fields__ = 'value', 'flags', '_width'

    type = "re"
    type: ClassVar[str] = "re"

    def to_regexp(self):
    def to_regexp(self) -> str:
        return self._get_flags(self.value)

    _width = None
@@ -73,11 +93,11 @@ class PatternRE(Pattern):
        return self._width

    @property
    def min_width(self):
    def min_width(self) -> int:
        return self._get_width()[0]

    @property
    def max_width(self):
    def max_width(self) -> int:
        return self._get_width()[1]


@@ -85,7 +105,11 @@ class TerminalDef(Serialize):
    __serialize_fields__ = 'name', 'pattern', 'priority'
    __serialize_namespace__ = PatternStr, PatternRE

    def __init__(self, name, pattern, priority=1):
    name: str
    pattern: Pattern
    priority: int

    def __init__(self, name: str, pattern: Pattern, priority: int=1) -> None:
        assert isinstance(pattern, Pattern), pattern
        self.name = name
        self.pattern = pattern
@@ -94,12 +118,13 @@ class TerminalDef(Serialize):
    def __repr__(self):
        return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)

    def user_repr(self):
    def user_repr(self) -> str:
        if self.name.startswith('__'): # We represent a generated terminal
            return self.pattern.raw or self.name
        else:
            return self.name

 _T = TypeVar('_T')

 class Token(str):
    """A string with meta-information, that is produced by the lexer.
@@ -122,6 +147,15 @@ class Token(str):
    """
    __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')

    type: str
    start_pos: int
    value: Any
    line: int
    column: int
    end_line: int
    end_column: int
    end_pos: int

    def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
        try:
            self = super(Token, cls).__new__(cls, value)
@@ -139,7 +173,7 @@ class Token(str):
        self.end_pos = end_pos
        return self

    def update(self, type_=None, value=None):
    def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token':
        return Token.new_borrow_pos(
            type_ if type_ is not None else self.type,
            value if value is not None else self.value,
@@ -147,7 +181,7 @@ class Token(str):
        )

    @classmethod
    def new_borrow_pos(cls, type_, value, borrow_t):
    def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T:
        return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)

    def __reduce__(self):
@@ -281,13 +315,35 @@ def _regexp_has_newline(r):
    return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)


 class Lexer(object):
 class LexerState(object):
    __slots__ = 'text', 'line_ctr', 'last_token'

    def __init__(self, text, line_ctr, last_token=None):
        self.text = text
        self.line_ctr = line_ctr
        self.last_token = last_token

    def __eq__(self, other):
        if not isinstance(other, LexerState):
            return NotImplemented

        return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token

    def __copy__(self):
        return type(self)(self.text, copy(self.line_ctr), self.last_token)


 _Callback = Callable[[Token], Token]

 class Lexer(ABC):
    """Lexer interface

    Method Signatures:
        lex(self, text) -> Iterator[Token]
        lex(self, lexer_state, parser_state) -> Iterator[Token]
    """
    lex = NotImplemented
    @abstractmethod
    def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
        ...

    def make_lexer_state(self, text):
        line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n')
@@ -296,7 +352,14 @@ class Lexer(object):

 class TraditionalLexer(Lexer):

    def __init__(self, conf):
    terminals: Collection[TerminalDef]
    ignore_types: FrozenSet[str]
    newline_types: FrozenSet[str]
    user_callbacks: Dict[str, _Callback]
    callback: Dict[str, _Callback]
    re: ModuleType

    def __init__(self, conf: 'LexerConf') -> None:
        terminals = list(conf.terminals)
        assert all(isinstance(t, TerminalDef) for t in terminals), terminals

@@ -329,7 +392,7 @@ class TraditionalLexer(Lexer):

        self._mres = None

    def _build(self):
    def _build(self) -> None:
        terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, self.re, self.use_bytes)
        assert all(self.callback.values())

@@ -343,23 +406,24 @@ class TraditionalLexer(Lexer):
        self._mres = build_mres(terminals, self.g_regex_flags, self.re, self.use_bytes)

    @property
    def mres(self):
    def mres(self) -> List[Tuple[REPattern, Dict[int, str]]]:
        if self._mres is None:
            self._build()
            assert self._mres is not None
        return self._mres

    def match(self, text, pos):
    def match(self, text: str, pos: int) -> Optional[Tuple[str, str]]:
        for mre, type_from_index in self.mres:
            m = mre.match(text, pos)
            if m:
                return m.group(0), type_from_index[m.lastindex]

    def lex(self, state, parser_state):
    def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
        with suppress(EOFError):
            while True:
                yield self.next_token(state, parser_state)

    def next_token(self, lex_state, parser_state=None):
    def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token:
        line_ctr = lex_state.line_ctr
        while line_ctr.char_pos < len(lex_state.text):
            res = self.match(lex_state.text, line_ctr.char_pos)
@@ -395,27 +459,12 @@ class TraditionalLexer(Lexer):
        raise EOFError(self)


 class LexerState(object):
    __slots__ = 'text', 'line_ctr', 'last_token'

    def __init__(self, text, line_ctr, last_token=None):
        self.text = text
        self.line_ctr = line_ctr
        self.last_token = last_token

    def __eq__(self, other):
        if not isinstance(other, LexerState):
            return NotImplemented

        return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token

    def __copy__(self):
        return type(self)(self.text, copy(self.line_ctr), self.last_token)


 class ContextualLexer(Lexer):

    def __init__(self, conf, states, always_accept=()):
    lexers: Dict[str, TraditionalLexer]
    root_lexer: TraditionalLexer

    def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None:
        terminals = list(conf.terminals)
        terminals_by_name = conf.terminals_by_name

@@ -443,7 +492,7 @@ class ContextualLexer(Lexer):
    def make_lexer_state(self, text):
        return self.root_lexer.make_lexer_state(text)

    def lex(self, lexer_state, parser_state):
    def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
        try:
            while True:
                lexer = self.lexers[parser_state.position]
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -8,6 +8,7 @@ import pkgutil
 from ast import literal_eval
 from numbers import Integral
 from contextlib import suppress
 from typing import List, Tuple, Union, Callable, Dict, Optional

 from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique
 from .lexer import Token, TerminalDef, PatternStr, PatternRE
@@ -17,7 +18,7 @@ from .parser_frontends import ParsingFrontend
 from .common import LexerConf, ParserConf
 from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
 from .utils import classify, dedup_list
 from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError
 from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput

 from .tree import Tree, SlottedTree as ST
 from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive
@@ -540,7 +541,12 @@ def nr_deepcopy_tree(t):


 class Grammar:
    def __init__(self, rule_defs, term_defs, ignore):

    term_defs: List[Tuple[str, Tuple[Tree, int]]]
    rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]]
    ignore: List[str]

    def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None:
        self.term_defs = term_defs
        self.rule_defs = rule_defs
        self.ignore = ignore
@@ -679,14 +685,18 @@ class FromPackageLoader(object):
    pkg_name: The name of the package. You can probably provide `__name__` most of the time
    search_paths: All the path that will be search on absolute imports.
    """
    def __init__(self, pkg_name, search_paths=("", )):

    pkg_name: str
    search_paths: Tuple[str, ...]

    def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None:
        self.pkg_name = pkg_name
        self.search_paths = search_paths

    def __repr__(self):
        return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

    def __call__(self, base_path, grammar_path):
    def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]:
        if base_path is None:
            to_try = self.search_paths
        else:
@@ -863,7 +873,7 @@ def _search_interactive_parser(interactive_parser, predicate):
        if predicate(p):
            return path, p

 def find_grammar_errors(text, start='start'):
 def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]:
    errors = []
    def on_error(e):
        errors.append((e, _error_repr(e)))
@@ -912,7 +922,12 @@ def _mangle_exp(exp, mangle):


 class GrammarBuilder:
    def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None):

    global_keep_all_tokens: bool
    import_paths: List[Union[str, Callable]]
    used_files: Dict[str, str]

    def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None:
        self.global_keep_all_tokens = global_keep_all_tokens
        self.import_paths = import_paths or []
        self.used_files = used_files or {}
@@ -1056,7 +1071,7 @@ class GrammarBuilder:
        return name, exp, params, opts


    def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None):
    def load_grammar(self, grammar_text: str, grammar_name: str="<?>", mangle: Optional[Callable[[str], str]]=None) -> None:
        tree = _parse_grammar(grammar_text, grammar_name)

        imports = {}
@@ -1119,7 +1134,7 @@ class GrammarBuilder:
        self._definitions = {k: v for k, v in self._definitions.items() if k in _used}


    def do_import(self, dotted_path, base_path, aliases, base_mangle=None):
    def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None:
        assert dotted_path
        mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)
        grammar_path = os.path.join(*dotted_path) + EXT
@@ -1155,7 +1170,7 @@ class GrammarBuilder:
            assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)


    def validate(self):
    def validate(self) -> None:
        for name, (params, exp, _options) in self._definitions.items():
            for i, p in enumerate(params):
                if p in self._definitions:
@@ -1184,7 +1199,7 @@ class GrammarBuilder:
        if not set(self._definitions).issuperset(self._ignore_names):
            raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))

    def build(self):
    def build(self) -> Grammar:
        self.validate()
        rule_defs = []
        term_defs = []
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser
 from .tree import Tree
 from .common import LexerConf, ParserConf
 try:
    import regex
    import regex  # type: ignore
 except ImportError:
    regex = None
 import re
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -1,11 +1,13 @@
 """Reconstruct text from a tree, based on Lark grammar"""

 from typing import List, Dict, Union, Callable, Iterable, Optional
 import unicodedata

 from .lark import Lark
 from .tree import Tree
 from .visitors import Transformer_InPlace
 from .lexer import Token, PatternStr
 from .grammar import Terminal, NonTerminal
 from .lexer import Token, PatternStr, TerminalDef
 from .grammar import Terminal, NonTerminal, Symbol

 from .tree_matcher import TreeMatcher, is_discarded_terminal
 from .utils import is_id_continue
@@ -21,7 +23,10 @@ def is_iter_empty(i):
 class WriteTokensTransformer(Transformer_InPlace):
    "Inserts discarded tokens into their correct place, according to the rules of grammar"

    def __init__(self, tokens, term_subs):
    tokens: Dict[str, TerminalDef]
    term_subs: Dict[str, Callable[[Symbol], str]]

    def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None:
        self.tokens = tokens
        self.term_subs = term_subs

@@ -70,7 +75,9 @@ class Reconstructor(TreeMatcher):
        term_subs: a dictionary of [Terminal name as str] to [output text as str]
    """

    def __init__(self, parser, term_subs=None):
    write_tokens: WriteTokensTransformer

    def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None:
        TreeMatcher.__init__(self, parser)

        self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
@@ -87,7 +94,7 @@ class Reconstructor(TreeMatcher):
            else:
                yield item

    def reconstruct(self, tree, postproc=None, insert_spaces=True):
    def reconstruct(self, tree: Tree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str:
        x = self._reconstruct(tree)
        if postproc:
            x = postproc(x)
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -1,16 +1,35 @@
 try:
    from future_builtins import filter
    from future_builtins import filter  # type: ignore
 except ImportError:
    pass

 import sys
 from copy import deepcopy


 ###{standalone
 from collections import OrderedDict
 from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING

 if TYPE_CHECKING:
    from .lexer import TerminalDef
    if sys.version_info >= (3, 8):
        from typing import Literal
    else:
        from typing_extensions import Literal

 class Meta:

    empty: bool
    line: int
    column: int
    start_pos: int
    end_line: int
    end_column: int
    end_pos: int
    orig_expansion: 'List[TerminalDef]'
    match_tree: bool

    def __init__(self):
        self.empty = True

@@ -27,13 +46,17 @@ class Tree(object):
        meta: Line & Column numbers (if ``propagate_positions`` is enabled).
            meta attributes: line, column, start_pos, end_line, end_column, end_pos
    """
    def __init__(self, data, children, meta=None):

    data: str
    children: 'List[Union[str, Tree]]'

    def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None:
        self.data = data
        self.children = children
        self._meta = meta

    @property
    def meta(self):
    def meta(self) -> Meta:
        if self._meta is None:
            self._meta = Meta()
        return self._meta
@@ -57,7 +80,7 @@ class Tree(object):

        return l

    def pretty(self, indent_str='  '):
    def pretty(self, indent_str: str='  ') -> str:
        """Returns an indented string representation of the tree.

        Great for debugging.
@@ -73,10 +96,10 @@ class Tree(object):
    def __ne__(self, other):
        return not (self == other)

    def __hash__(self):
    def __hash__(self) -> int:
        return hash((self.data, tuple(self.children)))

    def iter_subtrees(self):
    def iter_subtrees(self) -> 'Iterator[Tree]':
        """Depth-first iteration.

        Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
@@ -91,23 +114,23 @@ class Tree(object):
        del queue
        return reversed(list(subtrees.values()))

    def find_pred(self, pred):
    def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]':
        """Returns all nodes of the tree that evaluate pred(node) as true."""
        return filter(pred, self.iter_subtrees())

    def find_data(self, data):
    def find_data(self, data: str) -> 'Iterator[Tree]':
        """Returns all nodes of the tree whose data equals the given data."""
        return self.find_pred(lambda t: t.data == data)

 ###}

    def expand_kids_by_index(self, *indices):
    def expand_kids_by_index(self, *indices: int) -> None:
        """Expand (inline) children at the given indices"""
        for i in sorted(indices, reverse=True):  # reverse so that changing tail won't affect indices
            kid = self.children[i]
            self.children[i:i+1] = kid.children

    def scan_values(self, pred):
    def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]:
        """Return all values in the tree that evaluate pred(value) as true.

        This can be used to find all the tokens in the tree.
@@ -140,10 +163,10 @@ class Tree(object):
    def __deepcopy__(self, memo):
        return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta)

    def copy(self):
    def copy(self) -> 'Tree':
        return type(self)(self.data, self.children)

    def set(self, data, children):
    def set(self, data: str, children: 'List[Union[str, Tree]]') -> None:
        self.data = data
        self.children = children

@@ -153,7 +176,7 @@ class SlottedTree(Tree):
    __slots__ = 'data', 'children', 'rule', '_meta'


 def pydot__tree_to_png(tree, filename, rankdir="LR", **kwargs):
 def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None:
    graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
    graph.write_png(filename)

@@ -174,7 +197,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):
    possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
    """

    import pydot
    import pydot  # type: ignore
    graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)

    i = [0]
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -6,7 +6,7 @@ from collections import deque
 ###{standalone
 import sys, re
 import logging
 logger = logging.getLogger("lark")
 logger: logging.Logger = logging.getLogger("lark")
 logger.addHandler(logging.StreamHandler())
 # Set to highest level, since we have some warnings amongst the code
 # By default, we should not output any log messages
@@ -132,7 +132,7 @@ def smart_decorator(f, create_decorator):


 try:
    import regex
    import regex  # type: ignore
 except ImportError:
    regex = None

--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -1,3 +1,4 @@
 from abc import ABC
 from functools import wraps

 from .utils import smart_decorator, combine_alternatives
@@ -7,7 +8,12 @@ from .lexer import Token

 ###{standalone
 from inspect import getmembers, getmro
 from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional

 _T = TypeVar('_T')
 _R = TypeVar('_R')
 _FUNC = Callable[..., _T]
 _DECORATED = Union[_FUNC, type]

 class Discard(Exception):
    """When raising the Discard exception in a transformer callback,
@@ -46,7 +52,7 @@ class _Decoratable:
        return cls


 class Transformer(_Decoratable):
 class Transformer(_Decoratable, ABC, Generic[_T]):
    """Transformers visit each node of the tree, and run the appropriate method on it according to the node's data.

    Methods are provided by the user via inheritance, and called according to ``tree.data``.
@@ -74,7 +80,7 @@ class Transformer(_Decoratable):
    """
    __visit_tokens__ = True   # For backwards compatibility

    def __init__(self,  visit_tokens=True):
    def __init__(self,  visit_tokens: bool=True) -> None:
        self.__visit_tokens__ = visit_tokens

    def _call_userfunc(self, tree, new_children=None):
@@ -125,11 +131,11 @@ class Transformer(_Decoratable):
        children = list(self._transform_children(tree.children))
        return self._call_userfunc(tree, children)

    def transform(self, tree):
    def transform(self, tree: Tree) -> _T:
        "Transform the given tree, and return the final result"
        return self._transform_tree(tree)

    def __mul__(self, other):
    def __mul__(self, other: 'Transformer[_T]') -> 'TransformerChain[_T]':
        """Chain two transformers together, returning a new transformer.
        """
        return TransformerChain(self, other)
@@ -149,16 +155,19 @@ class Transformer(_Decoratable):
        return token


 class TransformerChain(object):
    def __init__(self, *transformers):
 class TransformerChain(Generic[_T]):

    transformers: Tuple[Transformer[_T], ...]

    def __init__(self, *transformers: Transformer[_T]) -> None:
        self.transformers = transformers

    def transform(self, tree):
    def transform(self, tree: Tree) -> _T:
        for t in self.transformers:
            tree = t.transform(tree)
        return tree

    def __mul__(self, other):
    def __mul__(self, other: Transformer[_T]) -> 'TransformerChain[_T]':
        return TransformerChain(*self.transformers + (other,))


@@ -239,19 +248,19 @@ class VisitorBase:
        return cls


 class Visitor(VisitorBase):
 class Visitor(VisitorBase, ABC, Generic[_T]):
    """Tree visitor, non-recursive (can handle huge trees).

    Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
    """

    def visit(self, tree):
    def visit(self, tree: Tree) -> Tree:
        "Visits the tree, starting with the leaves and finally the root (bottom-up)"
        for subtree in tree.iter_subtrees():
            self._call_userfunc(subtree)
        return tree

    def visit_topdown(self,tree):
    def visit_topdown(self, tree: Tree) -> Tree:
        "Visit the tree, starting at the root, and ending at the leaves (top-down)"
        for subtree in tree.iter_subtrees_topdown():
            self._call_userfunc(subtree)
@@ -266,7 +275,7 @@ class Visitor_Recursive(VisitorBase):
    Slightly faster than the non-recursive version.
    """

    def visit(self, tree):
    def visit(self, tree: Tree) -> Tree:
        "Visits the tree, starting with the leaves and finally the root (bottom-up)"
        for child in tree.children:
            if isinstance(child, Tree):
@@ -275,7 +284,7 @@ class Visitor_Recursive(VisitorBase):
        self._call_userfunc(tree)
        return tree

    def visit_topdown(self,tree):
    def visit_topdown(self,tree: Tree) -> Tree:
        "Visit the tree, starting at the root, and ending at the leaves (top-down)"
        self._call_userfunc(tree)

@@ -286,16 +295,7 @@ class Visitor_Recursive(VisitorBase):
        return tree


 def visit_children_decor(func):
    "See Interpreter"
    @wraps(func)
    def inner(cls, tree):
        values = cls.visit_children(tree)
        return func(cls, values)
    return inner


 class Interpreter(_Decoratable):
 class Interpreter(_Decoratable, ABC, Generic[_T]):
    """Interpreter walks the tree starting at the root.

    Visits the tree, starting with the root and finally the leaves (top-down)
@@ -307,7 +307,7 @@ class Interpreter(_Decoratable):
    This allows the user to implement branching and loops.
    """

    def visit(self, tree):
    def visit(self, tree: Tree) -> _T:
        f = getattr(self, tree.data)
        wrapper = getattr(f, 'visit_wrapper', None)
        if wrapper is not None:
@@ -315,7 +315,7 @@ class Interpreter(_Decoratable):
        else:
            return f(tree)

    def visit_children(self, tree):
    def visit_children(self, tree: Tree) -> List[_T]:
        return [self.visit(child) if isinstance(child, Tree) else child
                for child in tree.children]

@@ -326,6 +326,16 @@ class Interpreter(_Decoratable):
        return self.visit_children(tree)


 _InterMethod = Callable[[Type[Interpreter], _T], _R]

 def visit_children_decor(func: _InterMethod) -> _InterMethod:
    "See Interpreter"
    @wraps(func)
    def inner(cls, tree):
        values = cls.visit_children(tree)
        return func(cls, values)
    return inner

 # Decorators

 def _apply_decorator(obj, decorator, **kwargs):
@@ -380,7 +390,7 @@ def _vargs_tree(f, data, children, meta):
    return f(Tree(data, children, meta))


 def v_args(inline=False, meta=False, tree=False, wrapper=None):
 def v_args(inline: bool=False, meta: bool=False, tree: bool=False, wrapper: Optional[Callable]=None) -> Callable[[_DECORATED], _DECORATED]:
    """A convenience decorator factory for modifying the behavior of user-supplied visitor methods.

    By default, callback methods of transformers/visitors accept one argument - a list of the node's children.