More .pyi merging

exceptions, lark, and tree
3 年前 · 089bc2b523
--- a/lark-stubs/exceptions.pyi
+++ b/lark-stubs/exceptions.pyi
@@ -1,65 +0,0 @@
 # -*- coding: utf-8 -*-

 from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
 from .tree import Tree
 from .lexer import Token
 from .parsers.lalr_interactive_parser import InteractiveParser

 class LarkError(Exception):
    pass


 class ConfigurationError(LarkError, ValueError):
    pass


 class GrammarError(LarkError):
    pass


 class ParseError(LarkError):
    pass


 class LexError(LarkError):
    pass


 T = TypeVar('T')

 class UnexpectedEOF(ParseError):
    expected: List[Token]

 class UnexpectedInput(LarkError):
    line: int
    column: int
    pos_in_stream: int
    state: Any

    def get_context(self, text: str, span: int = ...) -> str:
        ...

    def match_examples(
            self,
            parse_fn: Callable[[str], Tree],
            examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
            token_type_match_fallback: bool = False,
            use_accepts: bool = False,
    ) -> T:
        ...


 class UnexpectedToken(ParseError, UnexpectedInput):
    expected: Set[str]
    considered_rules: Set[str]
    interactive_parser: InteractiveParser
    accepts: Set[str]

 class UnexpectedCharacters(LexError, UnexpectedInput):
    allowed: Set[str]
    considered_tokens: Set[Any]


 class VisitError(LarkError):
    obj: Union[Tree, Token]
    orig_exc: Exception
--- a/lark-stubs/lark.pyi
+++ b/lark-stubs/lark.pyi
@@ -1,19 +1,13 @@
 # -*- coding: utf-8 -*-

 from typing import (
    TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
    Type, List, Dict, IO, Iterator, Callable, Union, Optional,
    Literal, Protocol, Tuple, Iterable,
 )

 from .parsers.lalr_interactive_parser import InteractiveParser
 from .visitors import Transformer
 from .lexer import Token, Lexer, TerminalDef
 from .tree import Tree
 from .exceptions import UnexpectedInput
 from .load_grammar import Grammar

 _T = TypeVar('_T')

 from .load_grammar import Grammar, PackageResource

 class PostLex(Protocol):

@@ -22,39 +16,8 @@ class PostLex(Protocol):

    always_accept: Iterable[str]


 class LarkOptions:
    start: List[str]
    parser: str
    lexer: str
    transformer: Optional[Transformer]
    postlex: Optional[PostLex]
    ambiguity: str
    regex: bool
    debug: bool
    keep_all_tokens: bool
    propagate_positions: Union[bool, str]
    maybe_placeholders: bool
    lexer_callbacks: Dict[str, Callable[[Token], Token]]
    cache: Union[bool, str]
    g_regex_flags: int
    use_bytes: bool
    import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]
    source_path: Optional[str]


 class PackageResource(object):
    pkg_name: str
    path: str

    def __init__(self, pkg_name: str, path: str): ...


 class FromPackageLoader:
    def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...

    def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ...

    ...

 class Lark:
    source_path: str
@@ -88,22 +51,3 @@ class Lark:
    ):
        ...

    def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
        ...

    def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser:
        ...

    @classmethod
    def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T:
        ...

    @classmethod
    def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T:
        ...

    def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]:
        ...

    def get_terminal(self, name: str) -> TerminalDef:
        ...
--- a/lark-stubs/tree.pyi
+++ b/lark-stubs/tree.pyi
@@ -1,67 +1,9 @@
 # -*- coding: utf-8 -*-

 from typing import List, Callable, Iterator, Union, Optional, Literal, Any
 from .lexer import TerminalDef

 class Meta:
    empty: bool
    line: int
    column: int
    start_pos: int
    end_line: int
    end_column: int
    end_pos: int
    orig_expansion: List[TerminalDef]
    match_tree: bool

 from typing import Literal

 class Tree:
    data: str
    children: List[Union[str, Tree]]
    meta: Meta

    def __init__(
        self,
        data: str,
        children: List[Union[str, Tree]],
        meta: Optional[Meta] = None
    ) -> None:
        ...

    def pretty(self, indent_str: str = ...) -> str:
        ...

    def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]:
        ...

    def find_data(self, data: str) -> Iterator[Tree]:
        ...

    def expand_kids_by_index(self, *indices: int) -> None:
        ...

    def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]:
        ...

    def iter_subtrees(self) -> Iterator[Tree]:
        ...

    def iter_subtrees_topdown(self) -> Iterator[Tree]:
        ...

    def copy(self) -> Tree:
        ...

    def set(self, data: str, children: List[Union[str, Tree]]) -> None:
        ...

    def __hash__(self) -> int:
        ...


 class SlottedTree(Tree):
    pass

    ...

 def pydot__tree_to_png(
    tree: Tree,
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -3,6 +3,12 @@ from .utils import logger, NO_VALUE

 ###{standalone

 from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING

 if TYPE_CHECKING:
    from .lexer import Token
    from .parsers.lalr_interactive_parser import InteractiveParser
    from .tree import Tree

 class LarkError(Exception):
    pass
@@ -28,6 +34,7 @@ class ParseError(LarkError):
 class LexError(LarkError):
    pass

 T = TypeVar('T')

 class UnexpectedInput(LarkError):
    """UnexpectedInput Error.
@@ -39,10 +46,13 @@ class UnexpectedInput(LarkError):

    After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
    """
    line: int
    column: int
    pos_in_stream = None
    state: Any
    _terminals_by_name = None

    def get_context(self, text, span=40):
    def get_context(self, text: str, span: int=40) -> str:
        """Returns a pretty string pinpointing the error in the text,
        with span amount of context characters around it.

@@ -63,7 +73,7 @@ class UnexpectedInput(LarkError):
            after = text[pos:end].split(b'\n', 1)[0]
            return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")

    def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
    def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T:
        """Allows you to detect what's wrong in the input text by matching
        against example errors.

@@ -126,6 +136,9 @@ class UnexpectedInput(LarkError):


 class UnexpectedEOF(ParseError, UnexpectedInput):

    expected: 'List[Token]'

    def __init__(self, expected, state=None, terminals_by_name=None):
        self.expected = expected
        self.state = state
@@ -145,6 +158,10 @@ class UnexpectedEOF(ParseError, UnexpectedInput):


 class UnexpectedCharacters(LexError, UnexpectedInput):

    allowed: Set[str]
    considered_tokens: Set[Any]

    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
                 terminals_by_name=None, considered_rules=None):
        # TODO considered_tokens and allowed can be figured out using state
@@ -187,6 +204,10 @@ class UnexpectedToken(ParseError, UnexpectedInput):
    see: ``InteractiveParser``.
    """

    expected: Set[str]
    considered_rules: Set[str]
    interactive_parser: 'InteractiveParser'

    def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
        # TODO considered_rules and expected can be figured out using state
        self.line = getattr(token, 'line', '?')
@@ -205,7 +226,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
        super(UnexpectedToken, self).__init__()

    @property
    def accepts(self):
    def accepts(self) -> Set[str]:
        if self._accepts is NO_VALUE:
            self._accepts = self.interactive_parser and self.interactive_parser.accepts()
        return self._accepts
@@ -228,6 +249,9 @@ class VisitError(LarkError):
    - orig_exc: the exception that cause it to fail
    """

    obj: 'Union[Tree, Token]'
    orig_exc: Exception

    def __init__(self, rule, obj, orig_exc):
        self.obj = obj
        self.orig_exc = orig_exc
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -1,6 +1,10 @@
 from abc import ABC, abstractmethod
 import sys, os, pickle, hashlib
 import tempfile
 from typing import (
    TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional,
    Tuple, Iterable, TYPE_CHECKING
 )

 from .exceptions import ConfigurationError, assert_config
 from .utils import Serialize, SerializeMemoizer, FS, isascii, logger
@@ -8,7 +12,7 @@ from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_
 from .tree import Tree
 from .common import LexerConf, ParserConf

 from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread
 from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token
 from .parse_tree_builder import ParseTreeBuilder
 from .parser_frontends import get_frontend, _get_lexer_callbacks
 from .grammar import Rule
@@ -19,14 +23,44 @@ try:
 except ImportError:
    regex = None

 if TYPE_CHECKING:
    from .load_grammar import PackageResource
    from .exceptions import UnexpectedInput
    from .parsers.lalr_interactive_parser import InteractiveParser
    from .visitors import Transformer

 ###{standalone

 class PostLex(ABC):
    @abstractmethod
    def process(self, stream):
        return stream

    always_accept = ()

 class LarkOptions(Serialize):
    """Specifies the options for Lark

    """

    start: List[str]
    parser: str
    lexer: str
    transformer: 'Optional[Transformer]'
    postlex: Optional[PostLex]
    ambiguity: str
    regex: bool
    debug: bool
    keep_all_tokens: bool
    propagate_positions: Union[bool, str]
    maybe_placeholders: bool
    lexer_callbacks: Dict[str, Callable[[Token], Token]]
    cache: Union[bool, str]
    g_regex_flags: int
    use_bytes: bool
    import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
    source_path: Optional[str]

    OPTIONS_DOC = """
    **===  General Options  ===**

@@ -189,13 +223,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
 _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')


 class PostLex(ABC):
    @abstractmethod
    def process(self, stream):
        return stream

    always_accept = ()

 _T = TypeVar('_T')

 class Lark(Serialize):
    """Main interface for the library.
@@ -476,7 +504,7 @@ class Lark(Serialize):
        return inst._load({'data': data, 'memo': memo}, **kwargs)

    @classmethod
    def open(cls, grammar_filename, rel_to=None, **options):
    def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
        """Create an instance of Lark with the grammar given by its filename

        If ``rel_to`` is provided, the function will find the grammar filename in relation to it.
@@ -494,7 +522,7 @@ class Lark(Serialize):
            return cls(f, **options)

    @classmethod
    def open_from_package(cls, package, grammar_path, search_paths=("",), **options):
    def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T:
        """Create an instance of Lark with the grammar loaded from within the package `package`.
        This allows grammar loading from zipapps.

@@ -515,7 +543,7 @@ class Lark(Serialize):
        return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)


    def lex(self, text, dont_ignore=False):
    def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
        """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'

        When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.
@@ -530,11 +558,11 @@ class Lark(Serialize):
            return self.options.postlex.process(stream)
        return stream

    def get_terminal(self, name):
    def get_terminal(self, name: str) -> TerminalDef:
        """Get information about a terminal"""
        return self._terminals_dict[name]
    
    def parse_interactive(self, text=None, start=None):
    def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser':
        """Start an interactive parsing session.

        Parameters:
@@ -548,7 +576,7 @@ class Lark(Serialize):
        """
        return self.parser.parse_interactive(text, start=start)

    def parse(self, text, start=None, on_error=None):
    def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree:
        """Parse the given text, according to the options provided.

        Parameters:
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -691,14 +691,18 @@ class FromPackageLoader(object):
    pkg_name: The name of the package. You can probably provide `__name__` most of the time
    search_paths: All the path that will be search on absolute imports.
    """
    def __init__(self, pkg_name, search_paths=("", )):

    pkg_name: str
    search_paths: Tuple[str, ...]

    def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None:
        self.pkg_name = pkg_name
        self.search_paths = search_paths

    def __repr__(self):
        return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

    def __call__(self, base_path, grammar_path):
    def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]:
        if base_path is None:
            to_try = self.search_paths
        else:
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -8,9 +8,23 @@ from copy import deepcopy

 ###{standalone
 from collections import OrderedDict
 from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING

 if TYPE_CHECKING:
    from .lexer import TerminalDef

 class Meta:

    empty: bool
    line: int
    column: int
    start_pos: int
    end_line: int
    end_column: int
    end_pos: int
    orig_expansion: 'List[TerminalDef]'
    match_tree: bool

    def __init__(self):
        self.empty = True

@@ -27,13 +41,17 @@ class Tree(object):
        meta: Line & Column numbers (if ``propagate_positions`` is enabled).
            meta attributes: line, column, start_pos, end_line, end_column, end_pos
    """
    def __init__(self, data, children, meta=None):

    data: str
    children: 'List[Union[str, Tree]]'

    def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None:
        self.data = data
        self.children = children
        self._meta = meta

    @property
    def meta(self):
    def meta(self) -> Meta:
        if self._meta is None:
            self._meta = Meta()
        return self._meta
@@ -57,7 +75,7 @@ class Tree(object):

        return l

    def pretty(self, indent_str='  '):
    def pretty(self, indent_str: str='  ') -> str:
        """Returns an indented string representation of the tree.

        Great for debugging.
@@ -73,10 +91,10 @@ class Tree(object):
    def __ne__(self, other):
        return not (self == other)

    def __hash__(self):
    def __hash__(self) -> int:
        return hash((self.data, tuple(self.children)))

    def iter_subtrees(self):
    def iter_subtrees(self) -> 'Iterator[Tree]':
        """Depth-first iteration.

        Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
@@ -91,23 +109,23 @@ class Tree(object):
        del queue
        return reversed(list(subtrees.values()))

    def find_pred(self, pred):
    def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]':
        """Returns all nodes of the tree that evaluate pred(node) as true."""
        return filter(pred, self.iter_subtrees())

    def find_data(self, data):
    def find_data(self, data: str) -> 'Iterator[Tree]':
        """Returns all nodes of the tree whose data equals the given data."""
        return self.find_pred(lambda t: t.data == data)

 ###}

    def expand_kids_by_index(self, *indices):
    def expand_kids_by_index(self, *indices: int) -> None:
        """Expand (inline) children at the given indices"""
        for i in sorted(indices, reverse=True):  # reverse so that changing tail won't affect indices
            kid = self.children[i]
            self.children[i:i+1] = kid.children

    def scan_values(self, pred):
    def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]:
        """Return all values in the tree that evaluate pred(value) as true.

        This can be used to find all the tokens in the tree.
@@ -140,10 +158,10 @@ class Tree(object):
    def __deepcopy__(self, memo):
        return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta)

    def copy(self):
    def copy(self) -> 'Tree':
        return type(self)(self.data, self.children)

    def set(self, data, children):
    def set(self, data: str, children: 'List[Union[str, Tree]]') -> None:
        self.data = data
        self.children = children