exceptions, lark, and treegm/2021-09-23T00Z/github.com--lark-parser-lark/1.0b
| @@ -1,65 +0,0 @@ | |||
| # -*- coding: utf-8 -*- | |||
| from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | |||
| from .tree import Tree | |||
| from .lexer import Token | |||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||
| class LarkError(Exception): | |||
| pass | |||
| class ConfigurationError(LarkError, ValueError): | |||
| pass | |||
| class GrammarError(LarkError): | |||
| pass | |||
| class ParseError(LarkError): | |||
| pass | |||
| class LexError(LarkError): | |||
| pass | |||
| T = TypeVar('T') | |||
| class UnexpectedEOF(ParseError): | |||
| expected: List[Token] | |||
| class UnexpectedInput(LarkError): | |||
| line: int | |||
| column: int | |||
| pos_in_stream: int | |||
| state: Any | |||
| def get_context(self, text: str, span: int = ...) -> str: | |||
| ... | |||
| def match_examples( | |||
| self, | |||
| parse_fn: Callable[[str], Tree], | |||
| examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | |||
| token_type_match_fallback: bool = False, | |||
| use_accepts: bool = False, | |||
| ) -> T: | |||
| ... | |||
| class UnexpectedToken(ParseError, UnexpectedInput): | |||
| expected: Set[str] | |||
| considered_rules: Set[str] | |||
| interactive_parser: InteractiveParser | |||
| accepts: Set[str] | |||
| class UnexpectedCharacters(LexError, UnexpectedInput): | |||
| allowed: Set[str] | |||
| considered_tokens: Set[Any] | |||
| class VisitError(LarkError): | |||
| obj: Union[Tree, Token] | |||
| orig_exc: Exception | |||
| @@ -1,19 +1,13 @@ | |||
| # -*- coding: utf-8 -*- | |||
| from typing import ( | |||
| TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | |||
| Type, List, Dict, IO, Iterator, Callable, Union, Optional, | |||
| Literal, Protocol, Tuple, Iterable, | |||
| ) | |||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||
| from .visitors import Transformer | |||
| from .lexer import Token, Lexer, TerminalDef | |||
| from .tree import Tree | |||
| from .exceptions import UnexpectedInput | |||
| from .load_grammar import Grammar | |||
| _T = TypeVar('_T') | |||
| from .load_grammar import Grammar, PackageResource | |||
| class PostLex(Protocol): | |||
| @@ -22,39 +16,8 @@ class PostLex(Protocol): | |||
| always_accept: Iterable[str] | |||
| class LarkOptions: | |||
| start: List[str] | |||
| parser: str | |||
| lexer: str | |||
| transformer: Optional[Transformer] | |||
| postlex: Optional[PostLex] | |||
| ambiguity: str | |||
| regex: bool | |||
| debug: bool | |||
| keep_all_tokens: bool | |||
| propagate_positions: Union[bool, str] | |||
| maybe_placeholders: bool | |||
| lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
| cache: Union[bool, str] | |||
| g_regex_flags: int | |||
| use_bytes: bool | |||
| import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] | |||
| source_path: Optional[str] | |||
| class PackageResource(object): | |||
| pkg_name: str | |||
| path: str | |||
| def __init__(self, pkg_name: str, path: str): ... | |||
| class FromPackageLoader: | |||
| def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | |||
| def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ... | |||
| ... | |||
| class Lark: | |||
| source_path: str | |||
| @@ -88,22 +51,3 @@ class Lark: | |||
| ): | |||
| ... | |||
| def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | |||
| ... | |||
| def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: | |||
| ... | |||
| @classmethod | |||
| def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | |||
| ... | |||
| @classmethod | |||
| def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T: | |||
| ... | |||
| def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]: | |||
| ... | |||
| def get_terminal(self, name: str) -> TerminalDef: | |||
| ... | |||
| @@ -1,67 +1,9 @@ | |||
| # -*- coding: utf-8 -*- | |||
| from typing import List, Callable, Iterator, Union, Optional, Literal, Any | |||
| from .lexer import TerminalDef | |||
| class Meta: | |||
| empty: bool | |||
| line: int | |||
| column: int | |||
| start_pos: int | |||
| end_line: int | |||
| end_column: int | |||
| end_pos: int | |||
| orig_expansion: List[TerminalDef] | |||
| match_tree: bool | |||
| from typing import Literal | |||
| class Tree: | |||
| data: str | |||
| children: List[Union[str, Tree]] | |||
| meta: Meta | |||
| def __init__( | |||
| self, | |||
| data: str, | |||
| children: List[Union[str, Tree]], | |||
| meta: Optional[Meta] = None | |||
| ) -> None: | |||
| ... | |||
| def pretty(self, indent_str: str = ...) -> str: | |||
| ... | |||
| def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]: | |||
| ... | |||
| def find_data(self, data: str) -> Iterator[Tree]: | |||
| ... | |||
| def expand_kids_by_index(self, *indices: int) -> None: | |||
| ... | |||
| def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: | |||
| ... | |||
| def iter_subtrees(self) -> Iterator[Tree]: | |||
| ... | |||
| def iter_subtrees_topdown(self) -> Iterator[Tree]: | |||
| ... | |||
| def copy(self) -> Tree: | |||
| ... | |||
| def set(self, data: str, children: List[Union[str, Tree]]) -> None: | |||
| ... | |||
| def __hash__(self) -> int: | |||
| ... | |||
| class SlottedTree(Tree): | |||
| pass | |||
| ... | |||
| def pydot__tree_to_png( | |||
| tree: Tree, | |||
| @@ -3,6 +3,12 @@ from .utils import logger, NO_VALUE | |||
| ###{standalone | |||
| from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING | |||
| if TYPE_CHECKING: | |||
| from .lexer import Token | |||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||
| from .tree import Tree | |||
| class LarkError(Exception): | |||
| pass | |||
| @@ -28,6 +34,7 @@ class ParseError(LarkError): | |||
| class LexError(LarkError): | |||
| pass | |||
| T = TypeVar('T') | |||
| class UnexpectedInput(LarkError): | |||
| """UnexpectedInput Error. | |||
| @@ -39,10 +46,13 @@ class UnexpectedInput(LarkError): | |||
| After catching one of these exceptions, you may call the following helper methods to create a nicer error message. | |||
| """ | |||
| line: int | |||
| column: int | |||
| pos_in_stream = None | |||
| state: Any | |||
| _terminals_by_name = None | |||
| def get_context(self, text, span=40): | |||
| def get_context(self, text: str, span: int=40) -> str: | |||
| """Returns a pretty string pinpointing the error in the text, | |||
| with span amount of context characters around it. | |||
| @@ -63,7 +73,7 @@ class UnexpectedInput(LarkError): | |||
| after = text[pos:end].split(b'\n', 1)[0] | |||
| return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | |||
| def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): | |||
| def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T: | |||
| """Allows you to detect what's wrong in the input text by matching | |||
| against example errors. | |||
| @@ -126,6 +136,9 @@ class UnexpectedInput(LarkError): | |||
| class UnexpectedEOF(ParseError, UnexpectedInput): | |||
| expected: 'List[Token]' | |||
| def __init__(self, expected, state=None, terminals_by_name=None): | |||
| self.expected = expected | |||
| self.state = state | |||
| @@ -145,6 +158,10 @@ class UnexpectedEOF(ParseError, UnexpectedInput): | |||
| class UnexpectedCharacters(LexError, UnexpectedInput): | |||
| allowed: Set[str] | |||
| considered_tokens: Set[Any] | |||
| def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, | |||
| terminals_by_name=None, considered_rules=None): | |||
| # TODO considered_tokens and allowed can be figured out using state | |||
| @@ -187,6 +204,10 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
| see: ``InteractiveParser``. | |||
| """ | |||
| expected: Set[str] | |||
| considered_rules: Set[str] | |||
| interactive_parser: 'InteractiveParser' | |||
| def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): | |||
| # TODO considered_rules and expected can be figured out using state | |||
| self.line = getattr(token, 'line', '?') | |||
| @@ -205,7 +226,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
| super(UnexpectedToken, self).__init__() | |||
| @property | |||
| def accepts(self): | |||
| def accepts(self) -> Set[str]: | |||
| if self._accepts is NO_VALUE: | |||
| self._accepts = self.interactive_parser and self.interactive_parser.accepts() | |||
| return self._accepts | |||
| @@ -228,6 +249,9 @@ class VisitError(LarkError): | |||
| - orig_exc: the exception that cause it to fail | |||
| """ | |||
| obj: 'Union[Tree, Token]' | |||
| orig_exc: Exception | |||
| def __init__(self, rule, obj, orig_exc): | |||
| self.obj = obj | |||
| self.orig_exc = orig_exc | |||
| @@ -1,6 +1,10 @@ | |||
| from abc import ABC, abstractmethod | |||
| import sys, os, pickle, hashlib | |||
| import tempfile | |||
| from typing import ( | |||
| TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, | |||
| Tuple, Iterable, TYPE_CHECKING | |||
| ) | |||
| from .exceptions import ConfigurationError, assert_config | |||
| from .utils import Serialize, SerializeMemoizer, FS, isascii, logger | |||
| @@ -8,7 +12,7 @@ from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_ | |||
| from .tree import Tree | |||
| from .common import LexerConf, ParserConf | |||
| from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread | |||
| from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token | |||
| from .parse_tree_builder import ParseTreeBuilder | |||
| from .parser_frontends import get_frontend, _get_lexer_callbacks | |||
| from .grammar import Rule | |||
| @@ -19,14 +23,44 @@ try: | |||
| except ImportError: | |||
| regex = None | |||
| if TYPE_CHECKING: | |||
| from .load_grammar import PackageResource | |||
| from .exceptions import UnexpectedInput | |||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||
| from .visitors import Transformer | |||
| ###{standalone | |||
| class PostLex(ABC): | |||
| @abstractmethod | |||
| def process(self, stream): | |||
| return stream | |||
| always_accept = () | |||
| class LarkOptions(Serialize): | |||
| """Specifies the options for Lark | |||
| """ | |||
| start: List[str] | |||
| parser: str | |||
| lexer: str | |||
| transformer: 'Optional[Transformer]' | |||
| postlex: Optional[PostLex] | |||
| ambiguity: str | |||
| regex: bool | |||
| debug: bool | |||
| keep_all_tokens: bool | |||
| propagate_positions: Union[bool, str] | |||
| maybe_placeholders: bool | |||
| lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
| cache: Union[bool, str] | |||
| g_regex_flags: int | |||
| use_bytes: bool | |||
| import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' | |||
| source_path: Optional[str] | |||
| OPTIONS_DOC = """ | |||
| **=== General Options ===** | |||
| @@ -189,13 +223,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | |||
| _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | |||
| class PostLex(ABC): | |||
| @abstractmethod | |||
| def process(self, stream): | |||
| return stream | |||
| always_accept = () | |||
| _T = TypeVar('_T') | |||
| class Lark(Serialize): | |||
| """Main interface for the library. | |||
| @@ -476,7 +504,7 @@ class Lark(Serialize): | |||
| return inst._load({'data': data, 'memo': memo}, **kwargs) | |||
| @classmethod | |||
| def open(cls, grammar_filename, rel_to=None, **options): | |||
| def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T: | |||
| """Create an instance of Lark with the grammar given by its filename | |||
| If ``rel_to`` is provided, the function will find the grammar filename in relation to it. | |||
| @@ -494,7 +522,7 @@ class Lark(Serialize): | |||
| return cls(f, **options) | |||
| @classmethod | |||
| def open_from_package(cls, package, grammar_path, search_paths=("",), **options): | |||
| def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T: | |||
| """Create an instance of Lark with the grammar loaded from within the package `package`. | |||
| This allows grammar loading from zipapps. | |||
| @@ -515,7 +543,7 @@ class Lark(Serialize): | |||
| return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) | |||
| def lex(self, text, dont_ignore=False): | |||
| def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: | |||
| """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard' | |||
| When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore. | |||
| @@ -530,11 +558,11 @@ class Lark(Serialize): | |||
| return self.options.postlex.process(stream) | |||
| return stream | |||
| def get_terminal(self, name): | |||
| def get_terminal(self, name: str) -> TerminalDef: | |||
| """Get information about a terminal""" | |||
| return self._terminals_dict[name] | |||
| def parse_interactive(self, text=None, start=None): | |||
| def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser': | |||
| """Start an interactive parsing session. | |||
| Parameters: | |||
| @@ -548,7 +576,7 @@ class Lark(Serialize): | |||
| """ | |||
| return self.parser.parse_interactive(text, start=start) | |||
| def parse(self, text, start=None, on_error=None): | |||
| def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree: | |||
| """Parse the given text, according to the options provided. | |||
| Parameters: | |||
| @@ -691,14 +691,18 @@ class FromPackageLoader(object): | |||
| pkg_name: The name of the package. You can probably provide `__name__` most of the time | |||
| search_paths: All the path that will be search on absolute imports. | |||
| """ | |||
| def __init__(self, pkg_name, search_paths=("", )): | |||
| pkg_name: str | |||
| search_paths: Tuple[str, ...] | |||
| def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None: | |||
| self.pkg_name = pkg_name | |||
| self.search_paths = search_paths | |||
| def __repr__(self): | |||
| return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | |||
| def __call__(self, base_path, grammar_path): | |||
| def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: | |||
| if base_path is None: | |||
| to_try = self.search_paths | |||
| else: | |||
| @@ -8,9 +8,23 @@ from copy import deepcopy | |||
| ###{standalone | |||
| from collections import OrderedDict | |||
| from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING | |||
| if TYPE_CHECKING: | |||
| from .lexer import TerminalDef | |||
| class Meta: | |||
| empty: bool | |||
| line: int | |||
| column: int | |||
| start_pos: int | |||
| end_line: int | |||
| end_column: int | |||
| end_pos: int | |||
| orig_expansion: 'List[TerminalDef]' | |||
| match_tree: bool | |||
| def __init__(self): | |||
| self.empty = True | |||
| @@ -27,13 +41,17 @@ class Tree(object): | |||
| meta: Line & Column numbers (if ``propagate_positions`` is enabled). | |||
| meta attributes: line, column, start_pos, end_line, end_column, end_pos | |||
| """ | |||
| def __init__(self, data, children, meta=None): | |||
| data: str | |||
| children: 'List[Union[str, Tree]]' | |||
| def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None: | |||
| self.data = data | |||
| self.children = children | |||
| self._meta = meta | |||
| @property | |||
| def meta(self): | |||
| def meta(self) -> Meta: | |||
| if self._meta is None: | |||
| self._meta = Meta() | |||
| return self._meta | |||
| @@ -57,7 +75,7 @@ class Tree(object): | |||
| return l | |||
| def pretty(self, indent_str=' '): | |||
| def pretty(self, indent_str: str=' ') -> str: | |||
| """Returns an indented string representation of the tree. | |||
| Great for debugging. | |||
| @@ -73,10 +91,10 @@ class Tree(object): | |||
| def __ne__(self, other): | |||
| return not (self == other) | |||
| def __hash__(self): | |||
| def __hash__(self) -> int: | |||
| return hash((self.data, tuple(self.children))) | |||
| def iter_subtrees(self): | |||
| def iter_subtrees(self) -> 'Iterator[Tree]': | |||
| """Depth-first iteration. | |||
| Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). | |||
| @@ -91,23 +109,23 @@ class Tree(object): | |||
| del queue | |||
| return reversed(list(subtrees.values())) | |||
| def find_pred(self, pred): | |||
| def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]': | |||
| """Returns all nodes of the tree that evaluate pred(node) as true.""" | |||
| return filter(pred, self.iter_subtrees()) | |||
| def find_data(self, data): | |||
| def find_data(self, data: str) -> 'Iterator[Tree]': | |||
| """Returns all nodes of the tree whose data equals the given data.""" | |||
| return self.find_pred(lambda t: t.data == data) | |||
| ###} | |||
| def expand_kids_by_index(self, *indices): | |||
| def expand_kids_by_index(self, *indices: int) -> None: | |||
| """Expand (inline) children at the given indices""" | |||
| for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||
| kid = self.children[i] | |||
| self.children[i:i+1] = kid.children | |||
| def scan_values(self, pred): | |||
| def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]: | |||
| """Return all values in the tree that evaluate pred(value) as true. | |||
| This can be used to find all the tokens in the tree. | |||
| @@ -140,10 +158,10 @@ class Tree(object): | |||
| def __deepcopy__(self, memo): | |||
| return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta) | |||
| def copy(self): | |||
| def copy(self) -> 'Tree': | |||
| return type(self)(self.data, self.children) | |||
| def set(self, data, children): | |||
| def set(self, data: str, children: 'List[Union[str, Tree]]') -> None: | |||
| self.data = data | |||
| self.children = children | |||