exceptions, lark, and treegm/2021-09-23T00Z/github.com--lark-parser-lark/1.0b
@@ -1,65 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | |||
from .tree import Tree | |||
from .lexer import Token | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
class LarkError(Exception): | |||
pass | |||
class ConfigurationError(LarkError, ValueError): | |||
pass | |||
class GrammarError(LarkError): | |||
pass | |||
class ParseError(LarkError): | |||
pass | |||
class LexError(LarkError): | |||
pass | |||
T = TypeVar('T') | |||
class UnexpectedEOF(ParseError): | |||
expected: List[Token] | |||
class UnexpectedInput(LarkError): | |||
line: int | |||
column: int | |||
pos_in_stream: int | |||
state: Any | |||
def get_context(self, text: str, span: int = ...) -> str: | |||
... | |||
def match_examples( | |||
self, | |||
parse_fn: Callable[[str], Tree], | |||
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | |||
token_type_match_fallback: bool = False, | |||
use_accepts: bool = False, | |||
) -> T: | |||
... | |||
class UnexpectedToken(ParseError, UnexpectedInput): | |||
expected: Set[str] | |||
considered_rules: Set[str] | |||
interactive_parser: InteractiveParser | |||
accepts: Set[str] | |||
class UnexpectedCharacters(LexError, UnexpectedInput): | |||
allowed: Set[str] | |||
considered_tokens: Set[Any] | |||
class VisitError(LarkError): | |||
obj: Union[Tree, Token] | |||
orig_exc: Exception |
@@ -1,19 +1,13 @@ | |||
# -*- coding: utf-8 -*- | |||
from typing import ( | |||
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | |||
Type, List, Dict, IO, Iterator, Callable, Union, Optional, | |||
Literal, Protocol, Tuple, Iterable, | |||
) | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
from .visitors import Transformer | |||
from .lexer import Token, Lexer, TerminalDef | |||
from .tree import Tree | |||
from .exceptions import UnexpectedInput | |||
from .load_grammar import Grammar | |||
_T = TypeVar('_T') | |||
from .load_grammar import Grammar, PackageResource | |||
class PostLex(Protocol): | |||
@@ -22,39 +16,8 @@ class PostLex(Protocol): | |||
always_accept: Iterable[str] | |||
class LarkOptions: | |||
start: List[str] | |||
parser: str | |||
lexer: str | |||
transformer: Optional[Transformer] | |||
postlex: Optional[PostLex] | |||
ambiguity: str | |||
regex: bool | |||
debug: bool | |||
keep_all_tokens: bool | |||
propagate_positions: Union[bool, str] | |||
maybe_placeholders: bool | |||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
cache: Union[bool, str] | |||
g_regex_flags: int | |||
use_bytes: bool | |||
import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] | |||
source_path: Optional[str] | |||
class PackageResource(object): | |||
pkg_name: str | |||
path: str | |||
def __init__(self, pkg_name: str, path: str): ... | |||
class FromPackageLoader: | |||
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | |||
def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ... | |||
... | |||
class Lark: | |||
source_path: str | |||
@@ -88,22 +51,3 @@ class Lark: | |||
): | |||
... | |||
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | |||
... | |||
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: | |||
... | |||
@classmethod | |||
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | |||
... | |||
@classmethod | |||
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T: | |||
... | |||
def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]: | |||
... | |||
def get_terminal(self, name: str) -> TerminalDef: | |||
... |
@@ -1,67 +1,9 @@ | |||
# -*- coding: utf-8 -*- | |||
from typing import List, Callable, Iterator, Union, Optional, Literal, Any | |||
from .lexer import TerminalDef | |||
class Meta: | |||
empty: bool | |||
line: int | |||
column: int | |||
start_pos: int | |||
end_line: int | |||
end_column: int | |||
end_pos: int | |||
orig_expansion: List[TerminalDef] | |||
match_tree: bool | |||
from typing import Literal | |||
class Tree: | |||
data: str | |||
children: List[Union[str, Tree]] | |||
meta: Meta | |||
def __init__( | |||
self, | |||
data: str, | |||
children: List[Union[str, Tree]], | |||
meta: Optional[Meta] = None | |||
) -> None: | |||
... | |||
def pretty(self, indent_str: str = ...) -> str: | |||
... | |||
def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]: | |||
... | |||
def find_data(self, data: str) -> Iterator[Tree]: | |||
... | |||
def expand_kids_by_index(self, *indices: int) -> None: | |||
... | |||
def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: | |||
... | |||
def iter_subtrees(self) -> Iterator[Tree]: | |||
... | |||
def iter_subtrees_topdown(self) -> Iterator[Tree]: | |||
... | |||
def copy(self) -> Tree: | |||
... | |||
def set(self, data: str, children: List[Union[str, Tree]]) -> None: | |||
... | |||
def __hash__(self) -> int: | |||
... | |||
class SlottedTree(Tree): | |||
pass | |||
... | |||
def pydot__tree_to_png( | |||
tree: Tree, | |||
@@ -3,6 +3,12 @@ from .utils import logger, NO_VALUE | |||
###{standalone | |||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, TYPE_CHECKING | |||
if TYPE_CHECKING: | |||
from .lexer import Token | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
from .tree import Tree | |||
class LarkError(Exception): | |||
pass | |||
@@ -28,6 +34,7 @@ class ParseError(LarkError): | |||
class LexError(LarkError): | |||
pass | |||
T = TypeVar('T') | |||
class UnexpectedInput(LarkError): | |||
"""UnexpectedInput Error. | |||
@@ -39,10 +46,13 @@ class UnexpectedInput(LarkError): | |||
After catching one of these exceptions, you may call the following helper methods to create a nicer error message. | |||
""" | |||
line: int | |||
column: int | |||
pos_in_stream = None | |||
state: Any | |||
_terminals_by_name = None | |||
def get_context(self, text, span=40): | |||
def get_context(self, text: str, span: int=40) -> str: | |||
"""Returns a pretty string pinpointing the error in the text, | |||
with span amount of context characters around it. | |||
@@ -63,7 +73,7 @@ class UnexpectedInput(LarkError): | |||
after = text[pos:end].split(b'\n', 1)[0] | |||
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | |||
def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): | |||
def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> T: | |||
"""Allows you to detect what's wrong in the input text by matching | |||
against example errors. | |||
@@ -126,6 +136,9 @@ class UnexpectedInput(LarkError): | |||
class UnexpectedEOF(ParseError, UnexpectedInput): | |||
expected: 'List[Token]' | |||
def __init__(self, expected, state=None, terminals_by_name=None): | |||
self.expected = expected | |||
self.state = state | |||
@@ -145,6 +158,10 @@ class UnexpectedEOF(ParseError, UnexpectedInput): | |||
class UnexpectedCharacters(LexError, UnexpectedInput): | |||
allowed: Set[str] | |||
considered_tokens: Set[Any] | |||
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, | |||
terminals_by_name=None, considered_rules=None): | |||
# TODO considered_tokens and allowed can be figured out using state | |||
@@ -187,6 +204,10 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
see: ``InteractiveParser``. | |||
""" | |||
expected: Set[str] | |||
considered_rules: Set[str] | |||
interactive_parser: 'InteractiveParser' | |||
def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): | |||
# TODO considered_rules and expected can be figured out using state | |||
self.line = getattr(token, 'line', '?') | |||
@@ -205,7 +226,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
super(UnexpectedToken, self).__init__() | |||
@property | |||
def accepts(self): | |||
def accepts(self) -> Set[str]: | |||
if self._accepts is NO_VALUE: | |||
self._accepts = self.interactive_parser and self.interactive_parser.accepts() | |||
return self._accepts | |||
@@ -228,6 +249,9 @@ class VisitError(LarkError): | |||
- orig_exc: the exception that cause it to fail | |||
""" | |||
obj: 'Union[Tree, Token]' | |||
orig_exc: Exception | |||
def __init__(self, rule, obj, orig_exc): | |||
self.obj = obj | |||
self.orig_exc = orig_exc | |||
@@ -1,6 +1,10 @@ | |||
from abc import ABC, abstractmethod | |||
import sys, os, pickle, hashlib | |||
import tempfile | |||
from typing import ( | |||
TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, | |||
Tuple, Iterable, TYPE_CHECKING | |||
) | |||
from .exceptions import ConfigurationError, assert_config | |||
from .utils import Serialize, SerializeMemoizer, FS, isascii, logger | |||
@@ -8,7 +12,7 @@ from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_ | |||
from .tree import Tree | |||
from .common import LexerConf, ParserConf | |||
from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread | |||
from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token | |||
from .parse_tree_builder import ParseTreeBuilder | |||
from .parser_frontends import get_frontend, _get_lexer_callbacks | |||
from .grammar import Rule | |||
@@ -19,14 +23,44 @@ try: | |||
except ImportError: | |||
regex = None | |||
if TYPE_CHECKING: | |||
from .load_grammar import PackageResource | |||
from .exceptions import UnexpectedInput | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
from .visitors import Transformer | |||
###{standalone | |||
class PostLex(ABC): | |||
@abstractmethod | |||
def process(self, stream): | |||
return stream | |||
always_accept = () | |||
class LarkOptions(Serialize): | |||
"""Specifies the options for Lark | |||
""" | |||
start: List[str] | |||
parser: str | |||
lexer: str | |||
transformer: 'Optional[Transformer]' | |||
postlex: Optional[PostLex] | |||
ambiguity: str | |||
regex: bool | |||
debug: bool | |||
keep_all_tokens: bool | |||
propagate_positions: Union[bool, str] | |||
maybe_placeholders: bool | |||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
cache: Union[bool, str] | |||
g_regex_flags: int | |||
use_bytes: bool | |||
import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' | |||
source_path: Optional[str] | |||
OPTIONS_DOC = """ | |||
**=== General Options ===** | |||
@@ -189,13 +223,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | |||
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | |||
class PostLex(ABC): | |||
@abstractmethod | |||
def process(self, stream): | |||
return stream | |||
always_accept = () | |||
_T = TypeVar('_T') | |||
class Lark(Serialize): | |||
"""Main interface for the library. | |||
@@ -476,7 +504,7 @@ class Lark(Serialize): | |||
return inst._load({'data': data, 'memo': memo}, **kwargs) | |||
@classmethod | |||
def open(cls, grammar_filename, rel_to=None, **options): | |||
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T: | |||
"""Create an instance of Lark with the grammar given by its filename | |||
If ``rel_to`` is provided, the function will find the grammar filename in relation to it. | |||
@@ -494,7 +522,7 @@ class Lark(Serialize): | |||
return cls(f, **options) | |||
@classmethod | |||
def open_from_package(cls, package, grammar_path, search_paths=("",), **options): | |||
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T: | |||
"""Create an instance of Lark with the grammar loaded from within the package `package`. | |||
This allows grammar loading from zipapps. | |||
@@ -515,7 +543,7 @@ class Lark(Serialize): | |||
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) | |||
def lex(self, text, dont_ignore=False): | |||
def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: | |||
"""Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard' | |||
When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore. | |||
@@ -530,11 +558,11 @@ class Lark(Serialize): | |||
return self.options.postlex.process(stream) | |||
return stream | |||
def get_terminal(self, name): | |||
def get_terminal(self, name: str) -> TerminalDef: | |||
"""Get information about a terminal""" | |||
return self._terminals_dict[name] | |||
def parse_interactive(self, text=None, start=None): | |||
def parse_interactive(self, text: str=None, start: Optional[str]=None) -> 'InteractiveParser': | |||
"""Start an interactive parsing session. | |||
Parameters: | |||
@@ -548,7 +576,7 @@ class Lark(Serialize): | |||
""" | |||
return self.parser.parse_interactive(text, start=start) | |||
def parse(self, text, start=None, on_error=None): | |||
def parse(self, text: str, start: Optional[str]=None, on_error: 'Callable[[UnexpectedInput], bool]'=None) -> Tree: | |||
"""Parse the given text, according to the options provided. | |||
Parameters: | |||
@@ -691,14 +691,18 @@ class FromPackageLoader(object): | |||
pkg_name: The name of the package. You can probably provide `__name__` most of the time | |||
search_paths: All the path that will be search on absolute imports. | |||
""" | |||
def __init__(self, pkg_name, search_paths=("", )): | |||
pkg_name: str | |||
search_paths: Tuple[str, ...] | |||
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None: | |||
self.pkg_name = pkg_name | |||
self.search_paths = search_paths | |||
def __repr__(self): | |||
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | |||
def __call__(self, base_path, grammar_path): | |||
def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: | |||
if base_path is None: | |||
to_try = self.search_paths | |||
else: | |||
@@ -8,9 +8,23 @@ from copy import deepcopy | |||
###{standalone | |||
from collections import OrderedDict | |||
from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING | |||
if TYPE_CHECKING: | |||
from .lexer import TerminalDef | |||
class Meta: | |||
empty: bool | |||
line: int | |||
column: int | |||
start_pos: int | |||
end_line: int | |||
end_column: int | |||
end_pos: int | |||
orig_expansion: 'List[TerminalDef]' | |||
match_tree: bool | |||
def __init__(self): | |||
self.empty = True | |||
@@ -27,13 +41,17 @@ class Tree(object): | |||
meta: Line & Column numbers (if ``propagate_positions`` is enabled). | |||
meta attributes: line, column, start_pos, end_line, end_column, end_pos | |||
""" | |||
def __init__(self, data, children, meta=None): | |||
data: str | |||
children: 'List[Union[str, Tree]]' | |||
def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Meta=None) -> None: | |||
self.data = data | |||
self.children = children | |||
self._meta = meta | |||
@property | |||
def meta(self): | |||
def meta(self) -> Meta: | |||
if self._meta is None: | |||
self._meta = Meta() | |||
return self._meta | |||
@@ -57,7 +75,7 @@ class Tree(object): | |||
return l | |||
def pretty(self, indent_str=' '): | |||
def pretty(self, indent_str: str=' ') -> str: | |||
"""Returns an indented string representation of the tree. | |||
Great for debugging. | |||
@@ -73,10 +91,10 @@ class Tree(object): | |||
def __ne__(self, other): | |||
return not (self == other) | |||
def __hash__(self): | |||
def __hash__(self) -> int: | |||
return hash((self.data, tuple(self.children))) | |||
def iter_subtrees(self): | |||
def iter_subtrees(self) -> 'Iterator[Tree]': | |||
"""Depth-first iteration. | |||
Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). | |||
@@ -91,23 +109,23 @@ class Tree(object): | |||
del queue | |||
return reversed(list(subtrees.values())) | |||
def find_pred(self, pred): | |||
def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]': | |||
"""Returns all nodes of the tree that evaluate pred(node) as true.""" | |||
return filter(pred, self.iter_subtrees()) | |||
def find_data(self, data): | |||
def find_data(self, data: str) -> 'Iterator[Tree]': | |||
"""Returns all nodes of the tree whose data equals the given data.""" | |||
return self.find_pred(lambda t: t.data == data) | |||
###} | |||
def expand_kids_by_index(self, *indices): | |||
def expand_kids_by_index(self, *indices: int) -> None: | |||
"""Expand (inline) children at the given indices""" | |||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||
kid = self.children[i] | |||
self.children[i:i+1] = kid.children | |||
def scan_values(self, pred): | |||
def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]: | |||
"""Return all values in the tree that evaluate pred(value) as true. | |||
This can be used to find all the tokens in the tree. | |||
@@ -140,10 +158,10 @@ class Tree(object): | |||
def __deepcopy__(self, memo): | |||
return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta) | |||
def copy(self): | |||
def copy(self) -> 'Tree': | |||
return type(self)(self.data, self.children) | |||
def set(self, data, children): | |||
def set(self, data: str, children: 'List[Union[str, Tree]]') -> None: | |||
self.data = data | |||
self.children = children | |||