@@ -16,4 +16,4 @@ jobs: | |||
python -m pip install --upgrade pip | |||
pip install mypy | |||
- name: Lint with mypy | |||
run: mypy -p lark-stubs || true | |||
run: mypy -p lark || true |
@@ -1,12 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
from .tree import * | |||
from .visitors import * | |||
from .exceptions import * | |||
from .lexer import * | |||
from .load_grammar import * | |||
from .lark import * | |||
from logging import Logger as _Logger | |||
logger: _Logger | |||
__version__: str = ... |
@@ -1,17 +0,0 @@ | |||
import types | |||
from typing import Optional | |||
from .visitors import Transformer | |||
class Ast(object): | |||
pass | |||
class AsList(object): | |||
pass | |||
def create_transformer( | |||
ast_module: types.ModuleType, | |||
transformer: Optional[Transformer]=None | |||
) -> Transformer: | |||
... |
@@ -1,65 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | |||
from .tree import Tree | |||
from .lexer import Token | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
class LarkError(Exception): | |||
pass | |||
class ConfigurationError(LarkError, ValueError): | |||
pass | |||
class GrammarError(LarkError): | |||
pass | |||
class ParseError(LarkError): | |||
pass | |||
class LexError(LarkError): | |||
pass | |||
T = TypeVar('T') | |||
class UnexpectedEOF(ParseError): | |||
expected: List[Token] | |||
class UnexpectedInput(LarkError): | |||
line: int | |||
column: int | |||
pos_in_stream: int | |||
state: Any | |||
def get_context(self, text: str, span: int = ...) -> str: | |||
... | |||
def match_examples( | |||
self, | |||
parse_fn: Callable[[str], Tree], | |||
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | |||
token_type_match_fallback: bool = False, | |||
use_accepts: bool = False, | |||
) -> T: | |||
... | |||
class UnexpectedToken(ParseError, UnexpectedInput): | |||
expected: Set[str] | |||
considered_rules: Set[str] | |||
interactive_parser: InteractiveParser | |||
accepts: Set[str] | |||
class UnexpectedCharacters(LexError, UnexpectedInput): | |||
allowed: Set[str] | |||
considered_tokens: Set[Any] | |||
class VisitError(LarkError): | |||
obj: Union[Tree, Token] | |||
orig_exc: Exception |
@@ -1,14 +0,0 @@ | |||
from typing import Optional, Tuple | |||
class RuleOptions: | |||
keep_all_tokens: bool | |||
expand1: bool | |||
priority: int | |||
template_source: Optional[str] | |||
empty_indices: Tuple[bool, ...] | |||
class Symbol: | |||
name: str | |||
is_term: bool |
@@ -1,47 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
from typing import Tuple, List, Iterator, Optional | |||
from abc import ABC, abstractmethod | |||
from .lexer import Token | |||
from .lark import PostLex | |||
class Indenter(PostLex, ABC): | |||
paren_level: Optional[int] | |||
indent_level: Optional[List[int]] | |||
def __init__(self) -> None: | |||
... | |||
def handle_NL(self, token: Token) -> Iterator[Token]: | |||
... | |||
@property | |||
@abstractmethod | |||
def NL_type(self) -> str: | |||
... | |||
@property | |||
@abstractmethod | |||
def OPEN_PAREN_types(self) -> List[str]: | |||
... | |||
@property | |||
@abstractmethod | |||
def CLOSE_PAREN_types(self) -> List[str]: | |||
... | |||
@property | |||
@abstractmethod | |||
def INDENT_type(self) -> str: | |||
... | |||
@property | |||
@abstractmethod | |||
def DEDENT_type(self) -> str: | |||
... | |||
@property | |||
@abstractmethod | |||
def tab_len(self) -> int: | |||
... |
@@ -1,109 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
from typing import ( | |||
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | |||
Literal, Protocol, Tuple, Iterable, | |||
) | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
from .visitors import Transformer | |||
from .lexer import Token, Lexer, TerminalDef | |||
from .tree import Tree | |||
from .exceptions import UnexpectedInput | |||
from .load_grammar import Grammar | |||
_T = TypeVar('_T') | |||
class PostLex(Protocol): | |||
def process(self, stream: Iterator[Token]) -> Iterator[Token]: | |||
... | |||
always_accept: Iterable[str] | |||
class LarkOptions: | |||
start: List[str] | |||
parser: str | |||
lexer: str | |||
transformer: Optional[Transformer] | |||
postlex: Optional[PostLex] | |||
ambiguity: str | |||
regex: bool | |||
debug: bool | |||
keep_all_tokens: bool | |||
propagate_positions: Union[bool, Callable] | |||
maybe_placeholders: bool | |||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
cache: Union[bool, str] | |||
g_regex_flags: int | |||
use_bytes: bool | |||
import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] | |||
source_path: Optional[str] | |||
class PackageResource(object): | |||
pkg_name: str | |||
path: str | |||
def __init__(self, pkg_name: str, path: str): ... | |||
class FromPackageLoader: | |||
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | |||
def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ... | |||
class Lark: | |||
source_path: str | |||
source_grammar: str | |||
grammar: Grammar | |||
options: LarkOptions | |||
lexer: Lexer | |||
terminals: List[TerminalDef] | |||
def __init__( | |||
self, | |||
grammar: Union[Grammar, str, IO[str]], | |||
*, | |||
start: Union[None, str, List[str]] = "start", | |||
parser: Literal["earley", "lalr", "cyk", "auto"] = "auto", | |||
lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", | |||
transformer: Optional[Transformer] = None, | |||
postlex: Optional[PostLex] = None, | |||
ambiguity: Literal["explicit", "resolve"] = "resolve", | |||
regex: bool = False, | |||
debug: bool = False, | |||
keep_all_tokens: bool = False, | |||
propagate_positions: Union[bool, Callable] = False, | |||
maybe_placeholders: bool = False, | |||
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | |||
cache: Union[bool, str] = False, | |||
g_regex_flags: int = ..., | |||
use_bytes: bool = False, | |||
import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] = ..., | |||
source_path: Optional[str]=None, | |||
): | |||
... | |||
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | |||
... | |||
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: | |||
... | |||
@classmethod | |||
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | |||
... | |||
@classmethod | |||
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T: | |||
... | |||
def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]: | |||
... | |||
def get_terminal(self, name: str) -> TerminalDef: | |||
... |
@@ -1,161 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
from types import ModuleType | |||
from typing import ( | |||
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||
Pattern as REPattern, | |||
) | |||
from abc import abstractmethod, ABC | |||
_T = TypeVar('_T') | |||
class Pattern(ABC): | |||
value: str | |||
flags: Collection[str] | |||
raw: str | |||
type: str | |||
def __init__(self, value: str, flags: Collection[str] = (), raw: str = None) -> None: | |||
... | |||
@abstractmethod | |||
def to_regexp(self) -> str: | |||
... | |||
@property | |||
@abstractmethod | |||
def min_width(self) -> int: | |||
... | |||
@property | |||
@abstractmethod | |||
def max_width(self) -> int: | |||
... | |||
class PatternStr(Pattern): | |||
type: str = ... | |||
def to_regexp(self) -> str: | |||
... | |||
@property | |||
def min_width(self) -> int: | |||
... | |||
@property | |||
def max_width(self) -> int: | |||
... | |||
class PatternRE(Pattern): | |||
type: str = ... | |||
def to_regexp(self) -> str: | |||
... | |||
@property | |||
def min_width(self) -> int: | |||
... | |||
@property | |||
def max_width(self) -> int: | |||
... | |||
class TerminalDef: | |||
name: str | |||
pattern: Pattern | |||
priority: int | |||
def __init__(self, name: str, pattern: Pattern, priority: int = ...) -> None: | |||
... | |||
def user_repr(self) -> str: ... | |||
class Token(str): | |||
type: str | |||
start_pos: int | |||
value: Any | |||
line: int | |||
column: int | |||
end_line: int | |||
end_column: int | |||
end_pos: int | |||
def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None: | |||
... | |||
def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token: | |||
... | |||
@classmethod | |||
def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: Token) -> _T: | |||
... | |||
_Callback = Callable[[Token], Token] | |||
class Lexer(ABC): | |||
lex: Callable[..., Iterator[Token]] | |||
class LexerConf: | |||
tokens: Collection[TerminalDef] | |||
re_module: ModuleType | |||
ignore: Collection[str] = () | |||
postlex: Any =None | |||
callbacks: Optional[Dict[str, _Callback]] = None | |||
g_regex_flags: int = 0 | |||
skip_validation: bool = False | |||
use_bytes: bool = False | |||
class TraditionalLexer(Lexer): | |||
terminals: Collection[TerminalDef] | |||
ignore_types: FrozenSet[str] | |||
newline_types: FrozenSet[str] | |||
user_callbacks: Dict[str, _Callback] | |||
callback: Dict[str, _Callback] | |||
mres: List[Tuple[REPattern, Dict[int, str]]] | |||
re: ModuleType | |||
def __init__( | |||
self, | |||
conf: LexerConf | |||
) -> None: | |||
... | |||
def build(self) -> None: | |||
... | |||
def match(self, stream: str, pos: int) -> Optional[Tuple[str, str]]: | |||
... | |||
def lex(self, stream: str) -> Iterator[Token]: | |||
... | |||
def next_token(self, lex_state: Any, parser_state: Any = None) -> Token: | |||
... | |||
class ContextualLexer(Lexer): | |||
lexers: Dict[str, TraditionalLexer] | |||
root_lexer: TraditionalLexer | |||
def __init__( | |||
self, | |||
terminals: Collection[TerminalDef], | |||
states: Dict[str, Collection[str]], | |||
re_: ModuleType, | |||
ignore: Collection[str] = ..., | |||
always_accept: Collection[str] = ..., | |||
user_callbacks: Dict[str, _Callback] = ..., | |||
g_regex_flags: int = ... | |||
) -> None: | |||
... | |||
def lex(self, stream: str, get_parser_state: Callable[[], str]) -> Iterator[Token]: | |||
... |
@@ -1,31 +0,0 @@ | |||
from typing import List, Tuple, Union, Callable, Dict, Optional | |||
from .tree import Tree | |||
from .grammar import RuleOptions | |||
from .exceptions import UnexpectedInput | |||
class Grammar: | |||
rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] | |||
term_defs: List[Tuple[str, Tuple[Tree, int]]] | |||
ignore: List[str] | |||
class GrammarBuilder: | |||
global_keep_all_tokens: bool | |||
import_paths: List[Union[str, Callable]] | |||
used_files: Dict[str, str] | |||
def __init__(self, global_keep_all_tokens: bool = False, import_paths: List[Union[str, Callable]] = None, used_files: Dict[str, str]=None) -> None: ... | |||
def load_grammar(self, grammar_text: str, grammar_name: str = ..., mangle: Callable[[str], str] = None) -> None: ... | |||
def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], | |||
base_mangle: Callable[[str], str] = None) -> None: ... | |||
def validate(self) -> None: ... | |||
def build(self) -> Grammar: ... | |||
def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: ... |
@@ -1,39 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
from typing import List, Dict, Union, Callable, Iterable | |||
from .grammar import Symbol | |||
from .lark import Lark | |||
from .tree import Tree | |||
from .visitors import Transformer_InPlace | |||
from .lexer import TerminalDef | |||
class WriteTokensTransformer(Transformer_InPlace): | |||
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ... | |||
class MatchTree(Tree): | |||
pass | |||
class MakeMatchTree: | |||
name: str | |||
expansion: List[TerminalDef] | |||
def __init__(self, name: str, expansion: List[TerminalDef]): | |||
... | |||
def __call__(self, args: List[Union[str, Tree]]): | |||
... | |||
class Reconstructor: | |||
def __init__(self, parser: Lark, term_subs: Dict[str, Callable[[Symbol], str]] = ...): | |||
... | |||
def reconstruct(self, tree: Tree, postproc: Callable[[Iterable[str]], Iterable[str]]=None, | |||
insert_spaces: bool = True) -> str: | |||
... |
@@ -1,72 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
from typing import List, Callable, Iterator, Union, Optional, Literal, Any | |||
from .lexer import TerminalDef | |||
class Meta: | |||
empty: bool | |||
line: int | |||
column: int | |||
start_pos: int | |||
end_line: int | |||
end_column: int | |||
end_pos: int | |||
orig_expansion: List[TerminalDef] | |||
match_tree: bool | |||
class Tree: | |||
data: str | |||
children: List[Union[str, Tree]] | |||
meta: Meta | |||
def __init__( | |||
self, | |||
data: str, | |||
children: List[Union[str, Tree]], | |||
meta: Optional[Meta] = None | |||
) -> None: | |||
... | |||
def pretty(self, indent_str: str = ...) -> str: | |||
... | |||
def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]: | |||
... | |||
def find_data(self, data: str) -> Iterator[Tree]: | |||
... | |||
def expand_kids_by_index(self, *indices: int) -> None: | |||
... | |||
def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: | |||
... | |||
def iter_subtrees(self) -> Iterator[Tree]: | |||
... | |||
def iter_subtrees_topdown(self) -> Iterator[Tree]: | |||
... | |||
def copy(self) -> Tree: | |||
... | |||
def set(self, data: str, children: List[Union[str, Tree]]) -> None: | |||
... | |||
def __hash__(self) -> int: | |||
... | |||
class SlottedTree(Tree): | |||
pass | |||
def pydot__tree_to_png( | |||
tree: Tree, | |||
filename: str, | |||
rankdir: Literal["TB", "LR", "BT", "RL"] = ..., | |||
**kwargs | |||
) -> None: | |||
... |
@@ -1,108 +0,0 @@ | |||
# -*- coding: utf-8 -*- | |||
from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union | |||
from abc import ABC | |||
from .tree import Tree | |||
_T = TypeVar('_T') | |||
_R = TypeVar('_R') | |||
_FUNC = Callable[..., _T] | |||
_DECORATED = Union[_FUNC, type] | |||
class Transformer(ABC, Generic[_T]): | |||
def __init__(self, visit_tokens: bool = True) -> None: | |||
... | |||
def transform(self, tree: Tree) -> _T: | |||
... | |||
def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]: | |||
... | |||
class TransformerChain(Generic[_T]): | |||
transformers: Tuple[Transformer[_T], ...] | |||
def __init__(self, *transformers: Transformer[_T]) -> None: | |||
... | |||
def transform(self, tree: Tree) -> _T: | |||
... | |||
def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]: | |||
... | |||
class Transformer_InPlace(Transformer): | |||
pass | |||
class Transformer_NonRecursive(Transformer): | |||
pass | |||
class Transformer_InPlaceRecursive(Transformer): | |||
pass | |||
class VisitorBase: | |||
pass | |||
class Visitor(VisitorBase, ABC, Generic[_T]): | |||
def visit(self, tree: Tree) -> Tree: | |||
... | |||
def visit_topdown(self, tree: Tree) -> Tree: | |||
... | |||
class Visitor_Recursive(VisitorBase): | |||
def visit(self, tree: Tree) -> Tree: | |||
... | |||
def visit_topdown(self, tree: Tree) -> Tree: | |||
... | |||
class Interpreter(ABC, Generic[_T]): | |||
def visit(self, tree: Tree) -> _T: | |||
... | |||
def visit_children(self, tree: Tree) -> List[_T]: | |||
... | |||
_InterMethod = Callable[[Type[Interpreter], _T], _R] | |||
def v_args( | |||
inline: bool = False, | |||
meta: bool = False, | |||
tree: bool = False, | |||
wrapper: Callable = None | |||
) -> Callable[[_DECORATED], _DECORATED]: | |||
... | |||
def visit_children_decor(func: _InterMethod) -> _InterMethod: | |||
... | |||
class Discard(Exception): | |||
pass | |||
# Deprecated | |||
class InlineTransformer: | |||
pass | |||
# Deprecated | |||
def inline_args(obj: _FUNC) -> _FUNC: | |||
... |
@@ -6,4 +6,4 @@ from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | |||
from .lexer import Token | |||
from .lark import Lark | |||
__version__ = "1.0.0a" | |||
__version__: str = "1.0.0a" |
@@ -3,6 +3,8 @@ | |||
""" | |||
import inspect, re | |||
import types | |||
from typing import Optional | |||
from lark import Transformer, v_args | |||
@@ -27,7 +29,7 @@ def _call(func, _data, children, _meta): | |||
inline = v_args(wrapper=_call) | |||
def create_transformer(ast_module, transformer=None): | |||
def create_transformer(ast_module: types.ModuleType, transformer: Optional[Transformer]=None) -> Transformer: | |||
"""Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. | |||
For each class, we create a corresponding rule in the transformer, with a matching name. | |||
@@ -49,4 +51,4 @@ def create_transformer(ast_module, transformer=None): | |||
setattr(t, camel_to_snake(name), obj) | |||
return t | |||
return t |
@@ -1,16 +1,31 @@ | |||
from copy import deepcopy | |||
from types import ModuleType | |||
from .utils import Serialize | |||
from .lexer import TerminalDef | |||
from .lexer import TerminalDef, Token | |||
###{standalone | |||
from typing import Any, Callable, Collection, Dict, Optional, TYPE_CHECKING | |||
if TYPE_CHECKING: | |||
from .lark import PostLex | |||
_Callback = Callable[[Token], Token] | |||
class LexerConf(Serialize): | |||
__serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' | |||
__serialize_namespace__ = TerminalDef, | |||
def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False): | |||
terminals: Collection[TerminalDef] | |||
re_module: ModuleType | |||
ignore: Collection[str] | |||
postlex: 'Optional[PostLex]' | |||
callbacks: Dict[str, _Callback] | |||
g_regex_flags: int | |||
skip_validation: bool | |||
use_bytes: bool | |||
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): | |||
self.terminals = terminals | |||
self.terminals_by_name = {t.name: t for t in self.terminals} | |||
assert len(self.terminals) == len(self.terminals_by_name) | |||
@@ -4,6 +4,12 @@ from .utils import logger, NO_VALUE | |||
###{standalone | |||
from collections.abc import Sequence | |||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, TYPE_CHECKING | |||
if TYPE_CHECKING: | |||
from .lexer import Token | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
from .tree import Tree | |||
class LarkError(Exception): | |||
pass | |||
@@ -29,6 +35,7 @@ class ParseError(LarkError): | |||
class LexError(LarkError): | |||
pass | |||
T = TypeVar('T') | |||
class UnexpectedInput(LarkError): | |||
"""UnexpectedInput Error. | |||
@@ -40,10 +47,13 @@ class UnexpectedInput(LarkError): | |||
After catching one of these exceptions, you may call the following helper methods to create a nicer error message. | |||
""" | |||
line: int | |||
column: int | |||
pos_in_stream = None | |||
state: Any | |||
_terminals_by_name = None | |||
def get_context(self, text, span=40): | |||
def get_context(self, text: str, span: int=40) -> str: | |||
"""Returns a pretty string pinpointing the error in the text, | |||
with span amount of context characters around it. | |||
@@ -64,7 +74,7 @@ class UnexpectedInput(LarkError): | |||
after = text[pos:end].split(b'\n', 1)[0] | |||
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | |||
def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): | |||
def match_examples(self, parse_fn: 'Callable[[str], Tree]', examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool=False, use_accepts: bool=False) -> Optional[T]: | |||
"""Allows you to detect what's wrong in the input text by matching | |||
against example errors. | |||
@@ -127,6 +137,9 @@ class UnexpectedInput(LarkError): | |||
class UnexpectedEOF(ParseError, UnexpectedInput): | |||
expected: 'List[Token]' | |||
def __init__(self, expected, state=None, terminals_by_name=None): | |||
super(UnexpectedEOF, self).__init__() | |||
@@ -147,6 +160,10 @@ class UnexpectedEOF(ParseError, UnexpectedInput): | |||
class UnexpectedCharacters(LexError, UnexpectedInput): | |||
allowed: Set[str] | |||
considered_tokens: Set[Any] | |||
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, | |||
terminals_by_name=None, considered_rules=None): | |||
super(UnexpectedCharacters, self).__init__() | |||
@@ -190,6 +207,10 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
see: ``InteractiveParser``. | |||
""" | |||
expected: Set[str] | |||
considered_rules: Set[str] | |||
interactive_parser: 'InteractiveParser' | |||
def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): | |||
super(UnexpectedToken, self).__init__() | |||
@@ -209,7 +230,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
@property | |||
def accepts(self): | |||
def accepts(self) -> Set[str]: | |||
if self._accepts is NO_VALUE: | |||
self._accepts = self.interactive_parser and self.interactive_parser.accepts() | |||
return self._accepts | |||
@@ -232,6 +253,9 @@ class VisitError(LarkError): | |||
- orig_exc: the exception that cause it to fail | |||
""" | |||
obj: 'Union[Tree, Token]' | |||
orig_exc: Exception | |||
def __init__(self, rule, obj, orig_exc): | |||
message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) | |||
super(VisitError, self).__init__(message) | |||
@@ -1,13 +1,17 @@ | |||
from .utils import Serialize | |||
###{standalone | |||
from typing import Optional, Tuple, ClassVar | |||
class Symbol(Serialize): | |||
__slots__ = ('name',) | |||
is_term = NotImplemented | |||
name: str | |||
is_term: ClassVar[bool] = NotImplemented | |||
def __init__(self, name): | |||
def __init__(self, name: str) -> None: | |||
self.name = name | |||
def __eq__(self, other): | |||
@@ -29,7 +33,7 @@ class Symbol(Serialize): | |||
class Terminal(Symbol): | |||
__serialize_fields__ = 'name', 'filter_out' | |||
is_term = True | |||
is_term: ClassVar[bool] = True | |||
def __init__(self, name, filter_out=False): | |||
self.name = name | |||
@@ -43,13 +47,19 @@ class Terminal(Symbol): | |||
class NonTerminal(Symbol): | |||
__serialize_fields__ = 'name', | |||
is_term = False | |||
is_term: ClassVar[bool] = False | |||
class RuleOptions(Serialize): | |||
__serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | |||
def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()): | |||
keep_all_tokens: bool | |||
expand1: bool | |||
priority: Optional[int] | |||
template_source: Optional[str] | |||
empty_indices: Tuple[bool, ...] | |||
def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None: | |||
self.keep_all_tokens = keep_all_tokens | |||
self.expand1 = expand1 | |||
self.priority = priority | |||
@@ -1,24 +1,28 @@ | |||
"Provides Indentation services for languages with indentation similar to Python" | |||
from abc import ABC, abstractmethod | |||
from .exceptions import LarkError | |||
from .lark import PostLex | |||
from .lexer import Token | |||
###{standalone | |||
from typing import List, Iterator | |||
class DedentError(LarkError): | |||
pass | |||
class Indenter(PostLex): | |||
OPEN_PAREN_types: list | |||
CLOSE_PAREN_types: list | |||
DEDENT_type: str | |||
class Indenter(PostLex, ABC): | |||
paren_level: int | |||
indent_level: List[int] | |||
def __init__(self): | |||
self.paren_level = None | |||
self.indent_level = None | |||
def __init__(self) -> None: | |||
self.paren_level = 0 | |||
self.indent_level = [0] | |||
assert self.tab_len > 0 | |||
def handle_NL(self, token): | |||
def handle_NL(self, token: Token) -> Iterator[Token]: | |||
if self.paren_level > 0: | |||
return | |||
@@ -68,4 +72,34 @@ class Indenter(PostLex): | |||
def always_accept(self): | |||
return (self.NL_type,) | |||
@property | |||
@abstractmethod | |||
def NL_type(self) -> str: | |||
... | |||
@property | |||
@abstractmethod | |||
def OPEN_PAREN_types(self) -> List[str]: | |||
... | |||
@property | |||
@abstractmethod | |||
def CLOSE_PAREN_types(self) -> List[str]: | |||
... | |||
@property | |||
@abstractmethod | |||
def INDENT_type(self) -> str: | |||
... | |||
@property | |||
@abstractmethod | |||
def DEDENT_type(self) -> str: | |||
... | |||
@property | |||
@abstractmethod | |||
def tab_len(self) -> int: | |||
... | |||
###} |
@@ -2,31 +2,71 @@ from abc import ABC, abstractmethod | |||
import sys, os, pickle, hashlib | |||
import tempfile | |||
from .exceptions import ConfigurationError, assert_config | |||
from .exceptions import ConfigurationError, assert_config, UnexpectedInput | |||
from .utils import Serialize, SerializeMemoizer, FS, isascii, logger | |||
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files | |||
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource | |||
from .tree import Tree | |||
from .common import LexerConf, ParserConf | |||
from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread | |||
from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token | |||
from .parse_tree_builder import ParseTreeBuilder | |||
from .parser_frontends import get_frontend, _get_lexer_callbacks | |||
from .grammar import Rule | |||
import re | |||
try: | |||
import regex | |||
import regex # type: ignore | |||
except ImportError: | |||
regex = None | |||
###{standalone | |||
from typing import ( | |||
TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, | |||
Tuple, Iterable, IO, Any, TYPE_CHECKING | |||
) | |||
if TYPE_CHECKING: | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
from .visitors import Transformer | |||
if sys.version_info >= (3, 8): | |||
from typing import Literal | |||
else: | |||
from typing_extensions import Literal | |||
class PostLex(ABC): | |||
@abstractmethod | |||
def process(self, stream: Iterator[Token]) -> Iterator[Token]: | |||
return stream | |||
always_accept: Iterable[str] = () | |||
class LarkOptions(Serialize): | |||
"""Specifies the options for Lark | |||
""" | |||
start: List[str] | |||
debug: bool | |||
transformer: 'Optional[Transformer]' | |||
propagate_positions: Union[bool, str] | |||
maybe_placeholders: bool | |||
cache: Union[bool, str] | |||
regex: bool | |||
g_regex_flags: int | |||
keep_all_tokens: bool | |||
tree_class: Any | |||
parser: 'Literal["earley", "lalr", "cyk", "auto"]' | |||
lexer: 'Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]' | |||
ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]' | |||
postlex: Optional[PostLex] | |||
priority: 'Optional[Literal["auto", "normal", "invert"]]' | |||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||
use_bytes: bool | |||
edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]] | |||
import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' | |||
source_path: Optional[str] | |||
OPTIONS_DOC = """ | |||
**=== General Options ===** | |||
@@ -106,12 +146,10 @@ class LarkOptions(Serialize): | |||
# Adding a new option needs to be done in multiple places: | |||
# - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts | |||
# - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs | |||
# - In `lark-stubs/lark.pyi`: | |||
# - As attribute to `LarkOptions` | |||
# - As parameter to `Lark.__init__` | |||
# - As an attribute of `LarkOptions` above | |||
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | |||
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | |||
_defaults = { | |||
_defaults: Dict[str, Any] = { | |||
'debug': False, | |||
'keep_all_tokens': False, | |||
'tree_class': None, | |||
@@ -189,13 +227,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | |||
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | |||
class PostLex(ABC): | |||
@abstractmethod | |||
def process(self, stream): | |||
return stream | |||
always_accept = () | |||
_T = TypeVar('_T') | |||
class Lark(Serialize): | |||
"""Main interface for the library. | |||
@@ -210,7 +242,15 @@ class Lark(Serialize): | |||
>>> Lark(r'''start: "foo" ''') | |||
Lark(...) | |||
""" | |||
def __init__(self, grammar, **options): | |||
source_path: str | |||
source_grammar: str | |||
grammar: 'Grammar' | |||
options: LarkOptions | |||
lexer: Lexer | |||
terminals: List[TerminalDef] | |||
def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: | |||
self.options = LarkOptions(options) | |||
# Set regex or re module | |||
@@ -374,6 +414,7 @@ class Lark(Serialize): | |||
if cache_fn: | |||
logger.debug('Saving grammar to cache: %s', cache_fn) | |||
with FS.open(cache_fn, 'wb') as f: | |||
assert cache_md5 is not None | |||
f.write(cache_md5.encode('utf8') + b'\n') | |||
pickle.dump(used_files, f) | |||
self.save(f) | |||
@@ -476,7 +517,7 @@ class Lark(Serialize): | |||
return inst._load({'data': data, 'memo': memo}, **kwargs) | |||
@classmethod | |||
def open(cls, grammar_filename, rel_to=None, **options): | |||
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T: | |||
"""Create an instance of Lark with the grammar given by its filename | |||
If ``rel_to`` is provided, the function will find the grammar filename in relation to it. | |||
@@ -494,7 +535,7 @@ class Lark(Serialize): | |||
return cls(f, **options) | |||
@classmethod | |||
def open_from_package(cls, package, grammar_path, search_paths=("",), **options): | |||
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T: | |||
"""Create an instance of Lark with the grammar loaded from within the package `package`. | |||
This allows grammar loading from zipapps. | |||
@@ -515,7 +556,7 @@ class Lark(Serialize): | |||
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) | |||
def lex(self, text, dont_ignore=False): | |||
def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: | |||
"""Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard' | |||
When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore. | |||
@@ -530,11 +571,11 @@ class Lark(Serialize): | |||
return self.options.postlex.process(stream) | |||
return stream | |||
def get_terminal(self, name): | |||
def get_terminal(self, name: str) -> TerminalDef: | |||
"""Get information about a terminal""" | |||
return self._terminals_dict[name] | |||
def parse_interactive(self, text=None, start=None): | |||
def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': | |||
"""Start an interactive parsing session. | |||
Parameters: | |||
@@ -548,7 +589,7 @@ class Lark(Serialize): | |||
""" | |||
return self.parser.parse_interactive(text, start=start) | |||
def parse(self, text, start=None, on_error=None): | |||
def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree: | |||
"""Parse the given text, according to the options provided. | |||
Parameters: | |||
@@ -1,5 +1,6 @@ | |||
# Lexer Implementation | |||
from abc import abstractmethod, ABC | |||
import re | |||
from contextlib import suppress | |||
@@ -9,12 +10,23 @@ from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | |||
###{standalone | |||
from copy import copy | |||
from types import ModuleType | |||
from typing import ( | |||
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||
Pattern as REPattern, ClassVar, TYPE_CHECKING | |||
) | |||
class Pattern(Serialize): | |||
raw = None | |||
type = None | |||
if TYPE_CHECKING: | |||
from .common import LexerConf | |||
def __init__(self, value, flags=(), raw=None): | |||
class Pattern(Serialize, ABC): | |||
value: str | |||
flags: Collection[str] | |||
raw: Optional[str] | |||
type: ClassVar[str] | |||
def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: | |||
self.value = value | |||
self.flags = frozenset(flags) | |||
self.raw = raw | |||
@@ -29,13 +41,18 @@ class Pattern(Serialize): | |||
def __eq__(self, other): | |||
return type(self) == type(other) and self.value == other.value and self.flags == other.flags | |||
def to_regexp(self): | |||
@abstractmethod | |||
def to_regexp(self) -> str: | |||
raise NotImplementedError() | |||
def min_width(self): | |||
@property | |||
@abstractmethod | |||
def min_width(self) -> int: | |||
raise NotImplementedError() | |||
def max_width(self): | |||
@property | |||
@abstractmethod | |||
def max_width(self) -> int: | |||
raise NotImplementedError() | |||
def _get_flags(self, value): | |||
@@ -47,23 +64,26 @@ class Pattern(Serialize): | |||
class PatternStr(Pattern): | |||
__serialize_fields__ = 'value', 'flags' | |||
type = "str" | |||
type: ClassVar[str] = "str" | |||
def to_regexp(self): | |||
def to_regexp(self) -> str: | |||
return self._get_flags(re.escape(self.value)) | |||
@property | |||
def min_width(self): | |||
def min_width(self) -> int: | |||
return len(self.value) | |||
@property | |||
def max_width(self) -> int: | |||
return len(self.value) | |||
max_width = min_width | |||
class PatternRE(Pattern): | |||
__serialize_fields__ = 'value', 'flags', '_width' | |||
type = "re" | |||
type: ClassVar[str] = "re" | |||
def to_regexp(self): | |||
def to_regexp(self) -> str: | |||
return self._get_flags(self.value) | |||
_width = None | |||
@@ -73,11 +93,11 @@ class PatternRE(Pattern): | |||
return self._width | |||
@property | |||
def min_width(self): | |||
def min_width(self) -> int: | |||
return self._get_width()[0] | |||
@property | |||
def max_width(self): | |||
def max_width(self) -> int: | |||
return self._get_width()[1] | |||
@@ -85,7 +105,11 @@ class TerminalDef(Serialize): | |||
__serialize_fields__ = 'name', 'pattern', 'priority' | |||
__serialize_namespace__ = PatternStr, PatternRE | |||
def __init__(self, name, pattern, priority=1): | |||
name: str | |||
pattern: Pattern | |||
priority: int | |||
def __init__(self, name: str, pattern: Pattern, priority: int=1) -> None: | |||
assert isinstance(pattern, Pattern), pattern | |||
self.name = name | |||
self.pattern = pattern | |||
@@ -94,12 +118,13 @@ class TerminalDef(Serialize): | |||
def __repr__(self): | |||
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | |||
def user_repr(self): | |||
def user_repr(self) -> str: | |||
if self.name.startswith('__'): # We represent a generated terminal | |||
return self.pattern.raw or self.name | |||
else: | |||
return self.name | |||
_T = TypeVar('_T') | |||
class Token(str): | |||
"""A string with meta-information, that is produced by the lexer. | |||
@@ -122,6 +147,15 @@ class Token(str): | |||
""" | |||
__slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | |||
type: str | |||
start_pos: int | |||
value: Any | |||
line: int | |||
column: int | |||
end_line: int | |||
end_column: int | |||
end_pos: int | |||
def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): | |||
try: | |||
inst = super(Token, cls).__new__(cls, value) | |||
@@ -139,7 +173,7 @@ class Token(str): | |||
inst.end_pos = end_pos | |||
return inst | |||
def update(self, type_=None, value=None): | |||
def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token': | |||
return Token.new_borrow_pos( | |||
type_ if type_ is not None else self.type, | |||
value if value is not None else self.value, | |||
@@ -147,7 +181,7 @@ class Token(str): | |||
) | |||
@classmethod | |||
def new_borrow_pos(cls, type_, value, borrow_t): | |||
def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T: | |||
return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | |||
def __reduce__(self): | |||
@@ -298,13 +332,35 @@ def _regexp_has_newline(r: str): | |||
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | |||
class Lexer(object): | |||
class LexerState(object): | |||
__slots__ = 'text', 'line_ctr', 'last_token' | |||
def __init__(self, text, line_ctr, last_token=None): | |||
self.text = text | |||
self.line_ctr = line_ctr | |||
self.last_token = last_token | |||
def __eq__(self, other): | |||
if not isinstance(other, LexerState): | |||
return NotImplemented | |||
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||
def __copy__(self): | |||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||
_Callback = Callable[[Token], Token] | |||
class Lexer(ABC): | |||
"""Lexer interface | |||
Method Signatures: | |||
lex(self, text) -> Iterator[Token] | |||
lex(self, lexer_state, parser_state) -> Iterator[Token] | |||
""" | |||
lex = NotImplemented | |||
@abstractmethod | |||
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||
... | |||
def make_lexer_state(self, text): | |||
line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') | |||
@@ -313,7 +369,14 @@ class Lexer(object): | |||
class TraditionalLexer(Lexer): | |||
def __init__(self, conf): | |||
terminals: Collection[TerminalDef] | |||
ignore_types: FrozenSet[str] | |||
newline_types: FrozenSet[str] | |||
user_callbacks: Dict[str, _Callback] | |||
callback: Dict[str, _Callback] | |||
re: ModuleType | |||
def __init__(self, conf: 'LexerConf') -> None: | |||
terminals = list(conf.terminals) | |||
assert all(isinstance(t, TerminalDef) for t in terminals), terminals | |||
@@ -368,12 +431,12 @@ class TraditionalLexer(Lexer): | |||
def match(self, text, pos): | |||
return self.scanner.match(text, pos) | |||
def lex(self, state, parser_state): | |||
def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: | |||
with suppress(EOFError): | |||
while True: | |||
yield self.next_token(state, parser_state) | |||
def next_token(self, lex_state, parser_state=None): | |||
def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token: | |||
line_ctr = lex_state.line_ctr | |||
while line_ctr.char_pos < len(lex_state.text): | |||
res = self.match(lex_state.text, line_ctr.char_pos) | |||
@@ -409,27 +472,12 @@ class TraditionalLexer(Lexer): | |||
raise EOFError(self) | |||
class LexerState(object): | |||
__slots__ = 'text', 'line_ctr', 'last_token' | |||
def __init__(self, text, line_ctr, last_token=None): | |||
self.text = text | |||
self.line_ctr = line_ctr | |||
self.last_token = last_token | |||
def __eq__(self, other): | |||
if not isinstance(other, LexerState): | |||
return NotImplemented | |||
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||
def __copy__(self): | |||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||
class ContextualLexer(Lexer): | |||
def __init__(self, conf, states, always_accept=()): | |||
lexers: Dict[str, TraditionalLexer] | |||
root_lexer: TraditionalLexer | |||
def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None: | |||
terminals = list(conf.terminals) | |||
terminals_by_name = conf.terminals_by_name | |||
@@ -457,7 +505,7 @@ class ContextualLexer(Lexer): | |||
def make_lexer_state(self, text): | |||
return self.root_lexer.make_lexer_state(text) | |||
def lex(self, lexer_state, parser_state): | |||
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||
try: | |||
while True: | |||
lexer = self.lexers[parser_state.position] | |||
@@ -8,6 +8,7 @@ import pkgutil | |||
from ast import literal_eval | |||
from numbers import Integral | |||
from contextlib import suppress | |||
from typing import List, Tuple, Union, Callable, Dict, Optional | |||
from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique | |||
from .lexer import Token, TerminalDef, PatternStr, PatternRE | |||
@@ -17,7 +18,7 @@ from .parser_frontends import ParsingFrontend | |||
from .common import LexerConf, ParserConf | |||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||
from .utils import classify, dedup_list | |||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError | |||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput | |||
from .tree import Tree, SlottedTree as ST | |||
from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive | |||
@@ -540,7 +541,12 @@ def nr_deepcopy_tree(t): | |||
class Grammar: | |||
def __init__(self, rule_defs, term_defs, ignore): | |||
term_defs: List[Tuple[str, Tuple[Tree, int]]] | |||
rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] | |||
ignore: List[str] | |||
def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None: | |||
self.term_defs = term_defs | |||
self.rule_defs = rule_defs | |||
self.ignore = ignore | |||
@@ -679,14 +685,18 @@ class FromPackageLoader(object): | |||
pkg_name: The name of the package. You can probably provide `__name__` most of the time | |||
search_paths: All the path that will be search on absolute imports. | |||
""" | |||
def __init__(self, pkg_name, search_paths=("", )): | |||
pkg_name: str | |||
search_paths: Tuple[str, ...] | |||
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None: | |||
self.pkg_name = pkg_name | |||
self.search_paths = search_paths | |||
def __repr__(self): | |||
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | |||
def __call__(self, base_path, grammar_path): | |||
def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: | |||
if base_path is None: | |||
to_try = self.search_paths | |||
else: | |||
@@ -863,7 +873,7 @@ def _search_interactive_parser(interactive_parser, predicate): | |||
if predicate(p): | |||
return path, p | |||
def find_grammar_errors(text, start='start'): | |||
def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: | |||
errors = [] | |||
def on_error(e): | |||
errors.append((e, _error_repr(e))) | |||
@@ -912,7 +922,12 @@ def _mangle_exp(exp, mangle): | |||
class GrammarBuilder: | |||
def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None): | |||
global_keep_all_tokens: bool | |||
import_paths: List[Union[str, Callable]] | |||
used_files: Dict[str, str] | |||
def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None: | |||
self.global_keep_all_tokens = global_keep_all_tokens | |||
self.import_paths = import_paths or [] | |||
self.used_files = used_files or {} | |||
@@ -1056,7 +1071,7 @@ class GrammarBuilder: | |||
return name, exp, params, opts | |||
def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None): | |||
def load_grammar(self, grammar_text: str, grammar_name: str="<?>", mangle: Optional[Callable[[str], str]]=None) -> None: | |||
tree = _parse_grammar(grammar_text, grammar_name) | |||
imports = {} | |||
@@ -1119,7 +1134,7 @@ class GrammarBuilder: | |||
self._definitions = {k: v for k, v in self._definitions.items() if k in _used} | |||
def do_import(self, dotted_path, base_path, aliases, base_mangle=None): | |||
def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None: | |||
assert dotted_path | |||
mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) | |||
grammar_path = os.path.join(*dotted_path) + EXT | |||
@@ -1155,7 +1170,7 @@ class GrammarBuilder: | |||
assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) | |||
def validate(self): | |||
def validate(self) -> None: | |||
for name, (params, exp, _options) in self._definitions.items(): | |||
for i, p in enumerate(params): | |||
if p in self._definitions: | |||
@@ -1184,7 +1199,7 @@ class GrammarBuilder: | |||
if not set(self._definitions).issuperset(self._ignore_names): | |||
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) | |||
def build(self): | |||
def build(self) -> Grammar: | |||
self.validate() | |||
rule_defs = [] | |||
term_defs = [] | |||
@@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser | |||
from .tree import Tree | |||
from .common import LexerConf, ParserConf | |||
try: | |||
import regex | |||
import regex # type: ignore | |||
except ImportError: | |||
regex = None | |||
import re | |||
@@ -1,11 +1,13 @@ | |||
"""Reconstruct text from a tree, based on Lark grammar""" | |||
from typing import List, Dict, Union, Callable, Iterable, Optional | |||
import unicodedata | |||
from .lark import Lark | |||
from .tree import Tree | |||
from .visitors import Transformer_InPlace | |||
from .lexer import Token, PatternStr | |||
from .grammar import Terminal, NonTerminal | |||
from .lexer import Token, PatternStr, TerminalDef | |||
from .grammar import Terminal, NonTerminal, Symbol | |||
from .tree_matcher import TreeMatcher, is_discarded_terminal | |||
from .utils import is_id_continue | |||
@@ -21,7 +23,10 @@ def is_iter_empty(i): | |||
class WriteTokensTransformer(Transformer_InPlace): | |||
"Inserts discarded tokens into their correct place, according to the rules of grammar" | |||
def __init__(self, tokens, term_subs): | |||
tokens: Dict[str, TerminalDef] | |||
term_subs: Dict[str, Callable[[Symbol], str]] | |||
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: | |||
self.tokens = tokens | |||
self.term_subs = term_subs | |||
@@ -70,7 +75,9 @@ class Reconstructor(TreeMatcher): | |||
term_subs: a dictionary of [Terminal name as str] to [output text as str] | |||
""" | |||
def __init__(self, parser, term_subs=None): | |||
write_tokens: WriteTokensTransformer | |||
def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: | |||
TreeMatcher.__init__(self, parser) | |||
self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) | |||
@@ -87,7 +94,7 @@ class Reconstructor(TreeMatcher): | |||
else: | |||
yield item | |||
def reconstruct(self, tree, postproc=None, insert_spaces=True): | |||
def reconstruct(self, tree: Tree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: | |||
x = self._reconstruct(tree) | |||
if postproc: | |||
x = postproc(x) | |||
@@ -1,16 +1,35 @@ | |||
try: | |||
from future_builtins import filter | |||
from future_builtins import filter # type: ignore | |||
except ImportError: | |||
pass | |||
import sys | |||
from copy import deepcopy | |||
###{standalone | |||
from collections import OrderedDict | |||
from typing import List, Callable, Iterator, Union, Optional, Any, TYPE_CHECKING | |||
if TYPE_CHECKING: | |||
from .lexer import TerminalDef | |||
if sys.version_info >= (3, 8): | |||
from typing import Literal | |||
else: | |||
from typing_extensions import Literal | |||
class Meta: | |||
empty: bool | |||
line: int | |||
column: int | |||
start_pos: int | |||
end_line: int | |||
end_column: int | |||
end_pos: int | |||
orig_expansion: 'List[TerminalDef]' | |||
match_tree: bool | |||
def __init__(self): | |||
self.empty = True | |||
@@ -27,13 +46,17 @@ class Tree(object): | |||
meta: Line & Column numbers (if ``propagate_positions`` is enabled). | |||
meta attributes: line, column, start_pos, end_line, end_column, end_pos | |||
""" | |||
def __init__(self, data, children, meta=None): | |||
data: str | |||
children: 'List[Union[str, Tree]]' | |||
def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None: | |||
self.data = data | |||
self.children = children | |||
self._meta = meta | |||
@property | |||
def meta(self): | |||
def meta(self) -> Meta: | |||
if self._meta is None: | |||
self._meta = Meta() | |||
return self._meta | |||
@@ -57,7 +80,7 @@ class Tree(object): | |||
return l | |||
def pretty(self, indent_str=' '): | |||
def pretty(self, indent_str: str=' ') -> str: | |||
"""Returns an indented string representation of the tree. | |||
Great for debugging. | |||
@@ -73,10 +96,10 @@ class Tree(object): | |||
def __ne__(self, other): | |||
return not (self == other) | |||
def __hash__(self): | |||
def __hash__(self) -> int: | |||
return hash((self.data, tuple(self.children))) | |||
def iter_subtrees(self): | |||
def iter_subtrees(self) -> 'Iterator[Tree]': | |||
"""Depth-first iteration. | |||
Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). | |||
@@ -91,23 +114,23 @@ class Tree(object): | |||
del queue | |||
return reversed(list(subtrees.values())) | |||
def find_pred(self, pred): | |||
def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]': | |||
"""Returns all nodes of the tree that evaluate pred(node) as true.""" | |||
return filter(pred, self.iter_subtrees()) | |||
def find_data(self, data): | |||
def find_data(self, data: str) -> 'Iterator[Tree]': | |||
"""Returns all nodes of the tree whose data equals the given data.""" | |||
return self.find_pred(lambda t: t.data == data) | |||
###} | |||
def expand_kids_by_index(self, *indices): | |||
def expand_kids_by_index(self, *indices: int) -> None: | |||
"""Expand (inline) children at the given indices""" | |||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | |||
kid = self.children[i] | |||
self.children[i:i+1] = kid.children | |||
def scan_values(self, pred): | |||
def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]: | |||
"""Return all values in the tree that evaluate pred(value) as true. | |||
This can be used to find all the tokens in the tree. | |||
@@ -140,10 +163,10 @@ class Tree(object): | |||
def __deepcopy__(self, memo): | |||
return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta) | |||
def copy(self): | |||
def copy(self) -> 'Tree': | |||
return type(self)(self.data, self.children) | |||
def set(self, data, children): | |||
def set(self, data: str, children: 'List[Union[str, Tree]]') -> None: | |||
self.data = data | |||
self.children = children | |||
@@ -153,7 +176,7 @@ class SlottedTree(Tree): | |||
__slots__ = 'data', 'children', 'rule', '_meta' | |||
def pydot__tree_to_png(tree, filename, rankdir="LR", **kwargs): | |||
def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None: | |||
graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | |||
graph.write_png(filename) | |||
@@ -174,7 +197,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||
possible attributes, see https://www.graphviz.org/doc/info/attrs.html. | |||
""" | |||
import pydot | |||
import pydot # type: ignore | |||
graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) | |||
i = [0] | |||
@@ -6,7 +6,7 @@ from collections import deque | |||
###{standalone | |||
import sys, re | |||
import logging | |||
logger = logging.getLogger("lark") | |||
logger: logging.Logger = logging.getLogger("lark") | |||
logger.addHandler(logging.StreamHandler()) | |||
# Set to highest level, since we have some warnings amongst the code | |||
# By default, we should not output any log messages | |||
@@ -132,7 +132,7 @@ def smart_decorator(f, create_decorator): | |||
try: | |||
import regex | |||
import regex # type: ignore | |||
except ImportError: | |||
regex = None | |||
@@ -1,3 +1,4 @@ | |||
from abc import ABC | |||
from functools import wraps | |||
from .utils import smart_decorator, combine_alternatives | |||
@@ -7,7 +8,12 @@ from .lexer import Token | |||
###{standalone | |||
from inspect import getmembers, getmro | |||
from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional | |||
_T = TypeVar('_T') | |||
_R = TypeVar('_R') | |||
_FUNC = Callable[..., _T] | |||
_DECORATED = Union[_FUNC, type] | |||
class Discard(Exception): | |||
"""When raising the Discard exception in a transformer callback, | |||
@@ -46,7 +52,7 @@ class _Decoratable: | |||
return cls | |||
class Transformer(_Decoratable): | |||
class Transformer(_Decoratable, ABC, Generic[_T]): | |||
"""Transformers visit each node of the tree, and run the appropriate method on it according to the node's data. | |||
Methods are provided by the user via inheritance, and called according to ``tree.data``. | |||
@@ -74,7 +80,7 @@ class Transformer(_Decoratable): | |||
""" | |||
__visit_tokens__ = True # For backwards compatibility | |||
def __init__(self, visit_tokens=True): | |||
def __init__(self, visit_tokens: bool=True) -> None: | |||
self.__visit_tokens__ = visit_tokens | |||
def _call_userfunc(self, tree, new_children=None): | |||
@@ -125,11 +131,11 @@ class Transformer(_Decoratable): | |||
children = list(self._transform_children(tree.children)) | |||
return self._call_userfunc(tree, children) | |||
def transform(self, tree): | |||
def transform(self, tree: Tree) -> _T: | |||
"Transform the given tree, and return the final result" | |||
return self._transform_tree(tree) | |||
def __mul__(self, other): | |||
def __mul__(self, other: 'Transformer[_T]') -> 'TransformerChain[_T]': | |||
"""Chain two transformers together, returning a new transformer. | |||
""" | |||
return TransformerChain(self, other) | |||
@@ -149,16 +155,19 @@ class Transformer(_Decoratable): | |||
return token | |||
class TransformerChain(object): | |||
def __init__(self, *transformers): | |||
class TransformerChain(Generic[_T]): | |||
transformers: Tuple[Transformer[_T], ...] | |||
def __init__(self, *transformers: Transformer[_T]) -> None: | |||
self.transformers = transformers | |||
def transform(self, tree): | |||
def transform(self, tree: Tree) -> _T: | |||
for t in self.transformers: | |||
tree = t.transform(tree) | |||
return tree | |||
def __mul__(self, other): | |||
def __mul__(self, other: Transformer[_T]) -> 'TransformerChain[_T]': | |||
return TransformerChain(*self.transformers + (other,)) | |||
@@ -239,19 +248,19 @@ class VisitorBase: | |||
return cls | |||
class Visitor(VisitorBase): | |||
class Visitor(VisitorBase, ABC, Generic[_T]): | |||
"""Tree visitor, non-recursive (can handle huge trees). | |||
Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data`` | |||
""" | |||
def visit(self, tree): | |||
def visit(self, tree: Tree) -> Tree: | |||
"Visits the tree, starting with the leaves and finally the root (bottom-up)" | |||
for subtree in tree.iter_subtrees(): | |||
self._call_userfunc(subtree) | |||
return tree | |||
def visit_topdown(self,tree): | |||
def visit_topdown(self, tree: Tree) -> Tree: | |||
"Visit the tree, starting at the root, and ending at the leaves (top-down)" | |||
for subtree in tree.iter_subtrees_topdown(): | |||
self._call_userfunc(subtree) | |||
@@ -266,7 +275,7 @@ class Visitor_Recursive(VisitorBase): | |||
Slightly faster than the non-recursive version. | |||
""" | |||
def visit(self, tree): | |||
def visit(self, tree: Tree) -> Tree: | |||
"Visits the tree, starting with the leaves and finally the root (bottom-up)" | |||
for child in tree.children: | |||
if isinstance(child, Tree): | |||
@@ -275,7 +284,7 @@ class Visitor_Recursive(VisitorBase): | |||
self._call_userfunc(tree) | |||
return tree | |||
def visit_topdown(self,tree): | |||
def visit_topdown(self,tree: Tree) -> Tree: | |||
"Visit the tree, starting at the root, and ending at the leaves (top-down)" | |||
self._call_userfunc(tree) | |||
@@ -286,16 +295,7 @@ class Visitor_Recursive(VisitorBase): | |||
return tree | |||
def visit_children_decor(func): | |||
"See Interpreter" | |||
@wraps(func) | |||
def inner(cls, tree): | |||
values = cls.visit_children(tree) | |||
return func(cls, values) | |||
return inner | |||
class Interpreter(_Decoratable): | |||
class Interpreter(_Decoratable, ABC, Generic[_T]): | |||
"""Interpreter walks the tree starting at the root. | |||
Visits the tree, starting with the root and finally the leaves (top-down) | |||
@@ -307,7 +307,7 @@ class Interpreter(_Decoratable): | |||
This allows the user to implement branching and loops. | |||
""" | |||
def visit(self, tree): | |||
def visit(self, tree: Tree) -> _T: | |||
f = getattr(self, tree.data) | |||
wrapper = getattr(f, 'visit_wrapper', None) | |||
if wrapper is not None: | |||
@@ -315,7 +315,7 @@ class Interpreter(_Decoratable): | |||
else: | |||
return f(tree) | |||
def visit_children(self, tree): | |||
def visit_children(self, tree: Tree) -> List[_T]: | |||
return [self.visit(child) if isinstance(child, Tree) else child | |||
for child in tree.children] | |||
@@ -326,6 +326,16 @@ class Interpreter(_Decoratable): | |||
return self.visit_children(tree) | |||
_InterMethod = Callable[[Type[Interpreter], _T], _R] | |||
def visit_children_decor(func: _InterMethod) -> _InterMethod: | |||
"See Interpreter" | |||
@wraps(func) | |||
def inner(cls, tree): | |||
values = cls.visit_children(tree) | |||
return func(cls, values) | |||
return inner | |||
# Decorators | |||
def _apply_decorator(obj, decorator, **kwargs): | |||
@@ -380,7 +390,7 @@ def _vargs_tree(f, data, children, meta): | |||
return f(Tree(data, children, meta)) | |||
def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||
def v_args(inline: bool=False, meta: bool=False, tree: bool=False, wrapper: Optional[Callable]=None) -> Callable[[_DECORATED], _DECORATED]: | |||
"""A convenience decorator factory for modifying the behavior of user-supplied visitor methods. | |||
By default, callback methods of transformers/visitors accept one argument - a list of the node's children. | |||