@@ -16,4 +16,4 @@ jobs: | |||||
python -m pip install --upgrade pip | python -m pip install --upgrade pip | ||||
pip install mypy | pip install mypy | ||||
- name: Lint with mypy | - name: Lint with mypy | ||||
run: mypy -p lark-stubs || true | |||||
run: mypy -p lark || true |
@@ -6,7 +6,7 @@ jobs: | |||||
runs-on: ubuntu-latest | runs-on: ubuntu-latest | ||||
strategy: | strategy: | ||||
matrix: | matrix: | ||||
python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10.0-rc - 3.10, pypy2, pypy3] | |||||
python-version: [3.6, 3.7, 3.8, 3.9, 3.10.0-rc - 3.10, pypy3] | |||||
steps: | steps: | ||||
- uses: actions/checkout@v2 | - uses: actions/checkout@v2 | ||||
@@ -1,12 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from .tree import * | |||||
from .visitors import * | |||||
from .exceptions import * | |||||
from .lexer import * | |||||
from .load_grammar import * | |||||
from .lark import * | |||||
from logging import Logger as _Logger | |||||
logger: _Logger | |||||
__version__: str = ... |
@@ -1,17 +0,0 @@ | |||||
import types | |||||
from typing import Optional | |||||
from .visitors import Transformer | |||||
class Ast(object): | |||||
pass | |||||
class AsList(object): | |||||
pass | |||||
def create_transformer( | |||||
ast_module: types.ModuleType, | |||||
transformer: Optional[Transformer]=None | |||||
) -> Transformer: | |||||
... |
@@ -1,65 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | |||||
from .tree import Tree | |||||
from .lexer import Token | |||||
from .parsers.lalr_interactive_parser import InteractiveParser | |||||
class LarkError(Exception): | |||||
pass | |||||
class ConfigurationError(LarkError, ValueError): | |||||
pass | |||||
class GrammarError(LarkError): | |||||
pass | |||||
class ParseError(LarkError): | |||||
pass | |||||
class LexError(LarkError): | |||||
pass | |||||
T = TypeVar('T') | |||||
class UnexpectedEOF(ParseError): | |||||
expected: List[Token] | |||||
class UnexpectedInput(LarkError): | |||||
line: int | |||||
column: int | |||||
pos_in_stream: int | |||||
state: Any | |||||
def get_context(self, text: str, span: int = ...) -> str: | |||||
... | |||||
def match_examples( | |||||
self, | |||||
parse_fn: Callable[[str], Tree], | |||||
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | |||||
token_type_match_fallback: bool = False, | |||||
use_accepts: bool = False, | |||||
) -> T: | |||||
... | |||||
class UnexpectedToken(ParseError, UnexpectedInput): | |||||
expected: Set[str] | |||||
considered_rules: Set[str] | |||||
interactive_parser: InteractiveParser | |||||
accepts: Set[str] | |||||
class UnexpectedCharacters(LexError, UnexpectedInput): | |||||
allowed: Set[str] | |||||
considered_tokens: Set[Any] | |||||
class VisitError(LarkError): | |||||
obj: Union[Tree, Token] | |||||
orig_exc: Exception |
@@ -1,14 +0,0 @@ | |||||
from typing import Optional, Tuple | |||||
class RuleOptions: | |||||
keep_all_tokens: bool | |||||
expand1: bool | |||||
priority: int | |||||
template_source: Optional[str] | |||||
empty_indices: Tuple[bool, ...] | |||||
class Symbol: | |||||
name: str | |||||
is_term: bool |
@@ -1,47 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from typing import Tuple, List, Iterator, Optional | |||||
from abc import ABC, abstractmethod | |||||
from .lexer import Token | |||||
from .lark import PostLex | |||||
class Indenter(PostLex, ABC): | |||||
paren_level: Optional[int] | |||||
indent_level: Optional[List[int]] | |||||
def __init__(self) -> None: | |||||
... | |||||
def handle_NL(self, token: Token) -> Iterator[Token]: | |||||
... | |||||
@property | |||||
@abstractmethod | |||||
def NL_type(self) -> str: | |||||
... | |||||
@property | |||||
@abstractmethod | |||||
def OPEN_PAREN_types(self) -> List[str]: | |||||
... | |||||
@property | |||||
@abstractmethod | |||||
def CLOSE_PAREN_types(self) -> List[str]: | |||||
... | |||||
@property | |||||
@abstractmethod | |||||
def INDENT_type(self) -> str: | |||||
... | |||||
@property | |||||
@abstractmethod | |||||
def DEDENT_type(self) -> str: | |||||
... | |||||
@property | |||||
@abstractmethod | |||||
def tab_len(self) -> int: | |||||
... |
@@ -1,109 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from typing import ( | |||||
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional, | |||||
Literal, Protocol, Tuple, Iterable, | |||||
) | |||||
from .parsers.lalr_interactive_parser import InteractiveParser | |||||
from .visitors import Transformer | |||||
from .lexer import Token, Lexer, TerminalDef | |||||
from .tree import Tree | |||||
from .exceptions import UnexpectedInput | |||||
from .load_grammar import Grammar | |||||
_T = TypeVar('_T') | |||||
class PostLex(Protocol): | |||||
def process(self, stream: Iterator[Token]) -> Iterator[Token]: | |||||
... | |||||
always_accept: Iterable[str] | |||||
class LarkOptions: | |||||
start: List[str] | |||||
parser: str | |||||
lexer: str | |||||
transformer: Optional[Transformer] | |||||
postlex: Optional[PostLex] | |||||
ambiguity: str | |||||
regex: bool | |||||
debug: bool | |||||
keep_all_tokens: bool | |||||
propagate_positions: Union[bool, Callable] | |||||
maybe_placeholders: bool | |||||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||||
cache: Union[bool, str] | |||||
g_regex_flags: int | |||||
use_bytes: bool | |||||
import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] | |||||
source_path: Optional[str] | |||||
class PackageResource(object): | |||||
pkg_name: str | |||||
path: str | |||||
def __init__(self, pkg_name: str, path: str): ... | |||||
class FromPackageLoader: | |||||
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ... | |||||
def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ... | |||||
class Lark: | |||||
source_path: str | |||||
source_grammar: str | |||||
grammar: Grammar | |||||
options: LarkOptions | |||||
lexer: Lexer | |||||
terminals: List[TerminalDef] | |||||
def __init__( | |||||
self, | |||||
grammar: Union[Grammar, str, IO[str]], | |||||
*, | |||||
start: Union[None, str, List[str]] = "start", | |||||
parser: Literal["earley", "lalr", "cyk", "auto"] = "auto", | |||||
lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto", | |||||
transformer: Optional[Transformer] = None, | |||||
postlex: Optional[PostLex] = None, | |||||
ambiguity: Literal["explicit", "resolve"] = "resolve", | |||||
regex: bool = False, | |||||
debug: bool = False, | |||||
keep_all_tokens: bool = False, | |||||
propagate_positions: Union[bool, Callable] = False, | |||||
maybe_placeholders: bool = False, | |||||
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None, | |||||
cache: Union[bool, str] = False, | |||||
g_regex_flags: int = ..., | |||||
use_bytes: bool = False, | |||||
import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] = ..., | |||||
source_path: Optional[str]=None, | |||||
): | |||||
... | |||||
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | |||||
... | |||||
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: | |||||
... | |||||
@classmethod | |||||
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T: | |||||
... | |||||
@classmethod | |||||
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T: | |||||
... | |||||
def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]: | |||||
... | |||||
def get_terminal(self, name: str) -> TerminalDef: | |||||
... |
@@ -1,161 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from types import ModuleType | |||||
from typing import ( | |||||
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||||
Pattern as REPattern, | |||||
) | |||||
from abc import abstractmethod, ABC | |||||
_T = TypeVar('_T') | |||||
class Pattern(ABC): | |||||
value: str | |||||
flags: Collection[str] | |||||
raw: str | |||||
type: str | |||||
def __init__(self, value: str, flags: Collection[str] = (), raw: str = None) -> None: | |||||
... | |||||
@abstractmethod | |||||
def to_regexp(self) -> str: | |||||
... | |||||
@property | |||||
@abstractmethod | |||||
def min_width(self) -> int: | |||||
... | |||||
@property | |||||
@abstractmethod | |||||
def max_width(self) -> int: | |||||
... | |||||
class PatternStr(Pattern): | |||||
type: str = ... | |||||
def to_regexp(self) -> str: | |||||
... | |||||
@property | |||||
def min_width(self) -> int: | |||||
... | |||||
@property | |||||
def max_width(self) -> int: | |||||
... | |||||
class PatternRE(Pattern): | |||||
type: str = ... | |||||
def to_regexp(self) -> str: | |||||
... | |||||
@property | |||||
def min_width(self) -> int: | |||||
... | |||||
@property | |||||
def max_width(self) -> int: | |||||
... | |||||
class TerminalDef: | |||||
name: str | |||||
pattern: Pattern | |||||
priority: int | |||||
def __init__(self, name: str, pattern: Pattern, priority: int = ...) -> None: | |||||
... | |||||
def user_repr(self) -> str: ... | |||||
class Token(str): | |||||
type: str | |||||
start_pos: int | |||||
value: Any | |||||
line: int | |||||
column: int | |||||
end_line: int | |||||
end_column: int | |||||
end_pos: int | |||||
def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None: | |||||
... | |||||
def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token: | |||||
... | |||||
@classmethod | |||||
def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: Token) -> _T: | |||||
... | |||||
_Callback = Callable[[Token], Token] | |||||
class Lexer(ABC): | |||||
lex: Callable[..., Iterator[Token]] | |||||
class LexerConf: | |||||
tokens: Collection[TerminalDef] | |||||
re_module: ModuleType | |||||
ignore: Collection[str] = () | |||||
postlex: Any =None | |||||
callbacks: Optional[Dict[str, _Callback]] = None | |||||
g_regex_flags: int = 0 | |||||
skip_validation: bool = False | |||||
use_bytes: bool = False | |||||
class TraditionalLexer(Lexer): | |||||
terminals: Collection[TerminalDef] | |||||
ignore_types: FrozenSet[str] | |||||
newline_types: FrozenSet[str] | |||||
user_callbacks: Dict[str, _Callback] | |||||
callback: Dict[str, _Callback] | |||||
mres: List[Tuple[REPattern, Dict[int, str]]] | |||||
re: ModuleType | |||||
def __init__( | |||||
self, | |||||
conf: LexerConf | |||||
) -> None: | |||||
... | |||||
def build(self) -> None: | |||||
... | |||||
def match(self, stream: str, pos: int) -> Optional[Tuple[str, str]]: | |||||
... | |||||
def lex(self, stream: str) -> Iterator[Token]: | |||||
... | |||||
def next_token(self, lex_state: Any, parser_state: Any = None) -> Token: | |||||
... | |||||
class ContextualLexer(Lexer): | |||||
lexers: Dict[str, TraditionalLexer] | |||||
root_lexer: TraditionalLexer | |||||
def __init__( | |||||
self, | |||||
terminals: Collection[TerminalDef], | |||||
states: Dict[str, Collection[str]], | |||||
re_: ModuleType, | |||||
ignore: Collection[str] = ..., | |||||
always_accept: Collection[str] = ..., | |||||
user_callbacks: Dict[str, _Callback] = ..., | |||||
g_regex_flags: int = ... | |||||
) -> None: | |||||
... | |||||
def lex(self, stream: str, get_parser_state: Callable[[], str]) -> Iterator[Token]: | |||||
... |
@@ -1,31 +0,0 @@ | |||||
from typing import List, Tuple, Union, Callable, Dict, Optional | |||||
from .tree import Tree | |||||
from .grammar import RuleOptions | |||||
from .exceptions import UnexpectedInput | |||||
class Grammar: | |||||
rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] | |||||
term_defs: List[Tuple[str, Tuple[Tree, int]]] | |||||
ignore: List[str] | |||||
class GrammarBuilder: | |||||
global_keep_all_tokens: bool | |||||
import_paths: List[Union[str, Callable]] | |||||
used_files: Dict[str, str] | |||||
def __init__(self, global_keep_all_tokens: bool = False, import_paths: List[Union[str, Callable]] = None, used_files: Dict[str, str]=None) -> None: ... | |||||
def load_grammar(self, grammar_text: str, grammar_name: str = ..., mangle: Callable[[str], str] = None) -> None: ... | |||||
def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], | |||||
base_mangle: Callable[[str], str] = None) -> None: ... | |||||
def validate(self) -> None: ... | |||||
def build(self) -> Grammar: ... | |||||
def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: ... |
@@ -1,39 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from typing import List, Dict, Union, Callable, Iterable | |||||
from .grammar import Symbol | |||||
from .lark import Lark | |||||
from .tree import Tree | |||||
from .visitors import Transformer_InPlace | |||||
from .lexer import TerminalDef | |||||
class WriteTokensTransformer(Transformer_InPlace): | |||||
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ... | |||||
class MatchTree(Tree): | |||||
pass | |||||
class MakeMatchTree: | |||||
name: str | |||||
expansion: List[TerminalDef] | |||||
def __init__(self, name: str, expansion: List[TerminalDef]): | |||||
... | |||||
def __call__(self, args: List[Union[str, Tree]]): | |||||
... | |||||
class Reconstructor: | |||||
def __init__(self, parser: Lark, term_subs: Dict[str, Callable[[Symbol], str]] = ...): | |||||
... | |||||
def reconstruct(self, tree: Tree, postproc: Callable[[Iterable[str]], Iterable[str]]=None, | |||||
insert_spaces: bool = True) -> str: | |||||
... |
@@ -1,75 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from typing import List, Callable, Iterator, Union, Optional, Literal, Any | |||||
from .lexer import TerminalDef | |||||
class Meta: | |||||
empty: bool | |||||
line: int | |||||
column: int | |||||
start_pos: int | |||||
end_line: int | |||||
end_column: int | |||||
end_pos: int | |||||
orig_expansion: List[TerminalDef] | |||||
match_tree: bool | |||||
class Tree: | |||||
data: str | |||||
children: List[Union[str, Tree]] | |||||
meta: Meta | |||||
def __init__( | |||||
self, | |||||
data: str, | |||||
children: List[Union[str, Tree]], | |||||
meta: Optional[Meta] = None | |||||
) -> None: | |||||
... | |||||
def pretty(self, indent_str: str = ...) -> str: | |||||
... | |||||
def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]: | |||||
... | |||||
def find_data(self, data: str) -> Iterator[Tree]: | |||||
... | |||||
def expand_kids_by_index(self, *indices: int) -> None: | |||||
... | |||||
def expand_kids_by_data(self, *data_values: str) -> bool: | |||||
... | |||||
def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]: | |||||
... | |||||
def iter_subtrees(self) -> Iterator[Tree]: | |||||
... | |||||
def iter_subtrees_topdown(self) -> Iterator[Tree]: | |||||
... | |||||
def copy(self) -> Tree: | |||||
... | |||||
def set(self, data: str, children: List[Union[str, Tree]]) -> None: | |||||
... | |||||
def __hash__(self) -> int: | |||||
... | |||||
class SlottedTree(Tree): | |||||
pass | |||||
def pydot__tree_to_png( | |||||
tree: Tree, | |||||
filename: str, | |||||
rankdir: Literal["TB", "LR", "BT", "RL"] = ..., | |||||
**kwargs | |||||
) -> None: | |||||
... |
@@ -1,108 +0,0 @@ | |||||
# -*- coding: utf-8 -*- | |||||
from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union | |||||
from abc import ABC | |||||
from .tree import Tree | |||||
_T = TypeVar('_T') | |||||
_R = TypeVar('_R') | |||||
_FUNC = Callable[..., _T] | |||||
_DECORATED = Union[_FUNC, type] | |||||
class Transformer(ABC, Generic[_T]): | |||||
def __init__(self, visit_tokens: bool = True) -> None: | |||||
... | |||||
def transform(self, tree: Tree) -> _T: | |||||
... | |||||
def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]: | |||||
... | |||||
class TransformerChain(Generic[_T]): | |||||
transformers: Tuple[Transformer[_T], ...] | |||||
def __init__(self, *transformers: Transformer[_T]) -> None: | |||||
... | |||||
def transform(self, tree: Tree) -> _T: | |||||
... | |||||
def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]: | |||||
... | |||||
class Transformer_InPlace(Transformer): | |||||
pass | |||||
class Transformer_NonRecursive(Transformer): | |||||
pass | |||||
class Transformer_InPlaceRecursive(Transformer): | |||||
pass | |||||
class VisitorBase: | |||||
pass | |||||
class Visitor(VisitorBase, ABC, Generic[_T]): | |||||
def visit(self, tree: Tree) -> Tree: | |||||
... | |||||
def visit_topdown(self, tree: Tree) -> Tree: | |||||
... | |||||
class Visitor_Recursive(VisitorBase): | |||||
def visit(self, tree: Tree) -> Tree: | |||||
... | |||||
def visit_topdown(self, tree: Tree) -> Tree: | |||||
... | |||||
class Interpreter(ABC, Generic[_T]): | |||||
def visit(self, tree: Tree) -> _T: | |||||
... | |||||
def visit_children(self, tree: Tree) -> List[_T]: | |||||
... | |||||
_InterMethod = Callable[[Type[Interpreter], _T], _R] | |||||
def v_args( | |||||
inline: bool = False, | |||||
meta: bool = False, | |||||
tree: bool = False, | |||||
wrapper: Callable = None | |||||
) -> Callable[[_DECORATED], _DECORATED]: | |||||
... | |||||
def visit_children_decor(func: _InterMethod) -> _InterMethod: | |||||
... | |||||
class Discard(Exception): | |||||
pass | |||||
# Deprecated | |||||
class InlineTransformer: | |||||
pass | |||||
# Deprecated | |||||
def inline_args(obj: _FUNC) -> _FUNC: | |||||
... |
@@ -0,0 +1,9 @@ | |||||
v1.0 | |||||
- `maybe_placeholders` is now True by default | |||||
- `use_accepts` in `UnexpectedInput.match_examples()` is now True by default | |||||
- Token priority is now 0 by default | |||||
- `v_args(meta=True)` now gives meta as the first argument. i.e. `(meta, children)` |
@@ -1,10 +1,9 @@ | |||||
from .utils import logger | from .utils import logger | ||||
from .tree import Tree | from .tree import Tree | ||||
from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive | from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive | ||||
from .visitors import InlineTransformer, inline_args # XXX Deprecated | |||||
from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken, | ||||
UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError) | UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError) | ||||
from .lexer import Token | from .lexer import Token | ||||
from .lark import Lark | from .lark import Lark | ||||
__version__ = "0.12.0" | |||||
__version__: str = "1.0.0a" |
@@ -3,6 +3,8 @@ | |||||
""" | """ | ||||
import inspect, re | import inspect, re | ||||
import types | |||||
from typing import Optional, Callable | |||||
from lark import Transformer, v_args | from lark import Transformer, v_args | ||||
@@ -29,7 +31,9 @@ class WithMeta(object): | |||||
def camel_to_snake(name): | def camel_to_snake(name): | ||||
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower() | return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower() | ||||
def create_transformer(ast_module, transformer=None, decorator_factory=v_args): | |||||
def create_transformer(ast_module: types.ModuleType, | |||||
transformer: Optional[Transformer]=None, | |||||
decorator_factory: Callable=v_args) -> Transformer: | |||||
"""Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. | """Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST. | ||||
For each class, we create a corresponding rule in the transformer, with a matching name. | For each class, we create a corresponding rule in the transformer, with a matching name. | ||||
@@ -52,4 +56,4 @@ def create_transformer(ast_module, transformer=None, decorator_factory=v_args): | |||||
obj = wrapper(obj).__get__(t) | obj = wrapper(obj).__get__(t) | ||||
setattr(t, camel_to_snake(name), obj) | setattr(t, camel_to_snake(name), obj) | ||||
return t | |||||
return t |
@@ -1,17 +1,31 @@ | |||||
from warnings import warn | |||||
from copy import deepcopy | from copy import deepcopy | ||||
from types import ModuleType | |||||
from typing import Callable, Collection, Dict, Optional, TYPE_CHECKING | |||||
if TYPE_CHECKING: | |||||
from .lark import PostLex | |||||
from .utils import Serialize | from .utils import Serialize | ||||
from .lexer import TerminalDef | |||||
from .lexer import TerminalDef, Token | |||||
###{standalone | ###{standalone | ||||
_Callback = Callable[[Token], Token] | |||||
class LexerConf(Serialize): | class LexerConf(Serialize): | ||||
__serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' | __serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type' | ||||
__serialize_namespace__ = TerminalDef, | __serialize_namespace__ = TerminalDef, | ||||
def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False): | |||||
terminals: Collection[TerminalDef] | |||||
re_module: ModuleType | |||||
ignore: Collection[str] | |||||
postlex: 'Optional[PostLex]' | |||||
callbacks: Dict[str, _Callback] | |||||
g_regex_flags: int | |||||
skip_validation: bool | |||||
use_bytes: bool | |||||
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False): | |||||
self.terminals = terminals | self.terminals = terminals | ||||
self.terminals_by_name = {t.name: t for t in self.terminals} | self.terminals_by_name = {t.name: t for t in self.terminals} | ||||
assert len(self.terminals) == len(self.terminals_by_name) | assert len(self.terminals) == len(self.terminals_by_name) | ||||
@@ -24,11 +38,6 @@ class LexerConf(Serialize): | |||||
self.use_bytes = use_bytes | self.use_bytes = use_bytes | ||||
self.lexer_type = None | self.lexer_type = None | ||||
@property | |||||
def tokens(self): | |||||
warn("LexerConf.tokens is deprecated. Use LexerConf.terminals instead", DeprecationWarning) | |||||
return self.terminals | |||||
def _deserialize(self): | def _deserialize(self): | ||||
self.terminals_by_name = {t.name: t for t in self.terminals} | self.terminals_by_name = {t.name: t for t in self.terminals} | ||||
@@ -1,11 +1,13 @@ | |||||
from warnings import warn | |||||
from .utils import STRING_TYPE, logger, NO_VALUE | |||||
from .utils import logger, NO_VALUE | |||||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING | |||||
if TYPE_CHECKING: | |||||
from .lexer import Token | |||||
from .parsers.lalr_interactive_parser import InteractiveParser | |||||
from .tree import Tree | |||||
###{standalone | ###{standalone | ||||
class LarkError(Exception): | class LarkError(Exception): | ||||
pass | pass | ||||
@@ -14,7 +16,7 @@ class ConfigurationError(LarkError, ValueError): | |||||
pass | pass | ||||
def assert_config(value, options, msg='Got %r, expected one of %s'): | |||||
def assert_config(value, options: Collection, msg='Got %r, expected one of %s'): | |||||
if value not in options: | if value not in options: | ||||
raise ConfigurationError(msg % (value, options)) | raise ConfigurationError(msg % (value, options)) | ||||
@@ -30,6 +32,7 @@ class ParseError(LarkError): | |||||
class LexError(LarkError): | class LexError(LarkError): | ||||
pass | pass | ||||
T = TypeVar('T') | |||||
class UnexpectedInput(LarkError): | class UnexpectedInput(LarkError): | ||||
"""UnexpectedInput Error. | """UnexpectedInput Error. | ||||
@@ -42,10 +45,13 @@ class UnexpectedInput(LarkError): | |||||
After catching one of these exceptions, you may call the following helper methods to create a nicer error message. | After catching one of these exceptions, you may call the following helper methods to create a nicer error message. | ||||
""" | """ | ||||
line: int | |||||
column: int | |||||
pos_in_stream = None | pos_in_stream = None | ||||
state: Any | |||||
_terminals_by_name = None | _terminals_by_name = None | ||||
def get_context(self, text, span=40): | |||||
def get_context(self, text: str, span: int=40) -> str: | |||||
"""Returns a pretty string pinpointing the error in the text, | """Returns a pretty string pinpointing the error in the text, | ||||
with span amount of context characters around it. | with span amount of context characters around it. | ||||
@@ -66,7 +72,11 @@ class UnexpectedInput(LarkError): | |||||
after = text[pos:end].split(b'\n', 1)[0] | after = text[pos:end].split(b'\n', 1)[0] | ||||
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace") | ||||
def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): | |||||
def match_examples(self, parse_fn: 'Callable[[str], Tree]', | |||||
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | |||||
token_type_match_fallback: bool=False, | |||||
use_accepts: bool=True | |||||
) -> Optional[T]: | |||||
"""Allows you to detect what's wrong in the input text by matching | """Allows you to detect what's wrong in the input text by matching | ||||
against example errors. | against example errors. | ||||
@@ -81,8 +91,7 @@ class UnexpectedInput(LarkError): | |||||
Parameters: | Parameters: | ||||
parse_fn: parse function (usually ``lark_instance.parse``) | parse_fn: parse function (usually ``lark_instance.parse``) | ||||
examples: dictionary of ``{'example_string': value}``. | examples: dictionary of ``{'example_string': value}``. | ||||
use_accepts: Recommended to call this with ``use_accepts=True``. | |||||
The default is ``False`` for backwards compatibility. | |||||
use_accepts: Recommended to keep this as ``use_accepts=True``. | |||||
""" | """ | ||||
assert self.state is not None, "Not supported for this exception" | assert self.state is not None, "Not supported for this exception" | ||||
@@ -91,14 +100,14 @@ class UnexpectedInput(LarkError): | |||||
candidate = (None, False) | candidate = (None, False) | ||||
for i, (label, example) in enumerate(examples): | for i, (label, example) in enumerate(examples): | ||||
assert not isinstance(example, STRING_TYPE) | |||||
assert not isinstance(example, str), "Expecting a list" | |||||
for j, malformed in enumerate(example): | for j, malformed in enumerate(example): | ||||
try: | try: | ||||
parse_fn(malformed) | parse_fn(malformed) | ||||
except UnexpectedInput as ut: | except UnexpectedInput as ut: | ||||
if ut.state == self.state: | if ut.state == self.state: | ||||
if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts: | |||||
if use_accepts and hasattr(self, 'accepts') and hasattr(ut, 'accepts') and ut.accepts != self.accepts: | |||||
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | ||||
(self.state, self.accepts, ut.accepts, i, j)) | (self.state, self.accepts, ut.accepts, i, j)) | ||||
continue | continue | ||||
@@ -131,6 +140,7 @@ class UnexpectedInput(LarkError): | |||||
class UnexpectedEOF(ParseError, UnexpectedInput): | class UnexpectedEOF(ParseError, UnexpectedInput): | ||||
"""An exception that is raised by the parser, when the input ends while it still expects a token. | """An exception that is raised by the parser, when the input ends while it still expects a token. | ||||
""" | """ | ||||
expected: 'List[Token]' | |||||
def __init__(self, expected, state=None, terminals_by_name=None): | def __init__(self, expected, state=None, terminals_by_name=None): | ||||
super(UnexpectedEOF, self).__init__() | super(UnexpectedEOF, self).__init__() | ||||
@@ -156,6 +166,9 @@ class UnexpectedCharacters(LexError, UnexpectedInput): | |||||
string of characters to any of its terminals. | string of characters to any of its terminals. | ||||
""" | """ | ||||
allowed: Set[str] | |||||
considered_tokens: Set[Any] | |||||
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, | def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None, | ||||
terminals_by_name=None, considered_rules=None): | terminals_by_name=None, considered_rules=None): | ||||
super(UnexpectedCharacters, self).__init__() | super(UnexpectedCharacters, self).__init__() | ||||
@@ -204,6 +217,10 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
Note: These parameters are available as attributes of the instance. | Note: These parameters are available as attributes of the instance. | ||||
""" | """ | ||||
expected: Set[str] | |||||
considered_rules: Set[str] | |||||
interactive_parser: 'InteractiveParser' | |||||
def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): | def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): | ||||
super(UnexpectedToken, self).__init__() | super(UnexpectedToken, self).__init__() | ||||
@@ -223,7 +240,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
@property | @property | ||||
def accepts(self): | |||||
def accepts(self) -> Set[str]: | |||||
if self._accepts is NO_VALUE: | if self._accepts is NO_VALUE: | ||||
self._accepts = self.interactive_parser and self.interactive_parser.accepts() | self._accepts = self.interactive_parser and self.interactive_parser.accepts() | ||||
return self._accepts | return self._accepts | ||||
@@ -236,11 +253,6 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
return message | return message | ||||
@property | |||||
def puppet(self): | |||||
warn("UnexpectedToken.puppet attribute has been renamed to interactive_parser", DeprecationWarning) | |||||
return self.interactive_parser | |||||
class VisitError(LarkError): | class VisitError(LarkError): | ||||
@@ -256,6 +268,9 @@ class VisitError(LarkError): | |||||
Note: These parameters are available as attributes | Note: These parameters are available as attributes | ||||
""" | """ | ||||
obj: 'Union[Tree, Token]' | |||||
orig_exc: Exception | |||||
def __init__(self, rule, obj, orig_exc): | def __init__(self, rule, obj, orig_exc): | ||||
message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) | message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc) | ||||
super(VisitError, self).__init__(message) | super(VisitError, self).__init__(message) | ||||
@@ -1,13 +1,18 @@ | |||||
from typing import Optional, Tuple, ClassVar | |||||
from .utils import Serialize | from .utils import Serialize | ||||
###{standalone | ###{standalone | ||||
TOKEN_DEFAULT_PRIORITY = 0 | |||||
class Symbol(Serialize): | class Symbol(Serialize): | ||||
__slots__ = ('name',) | __slots__ = ('name',) | ||||
is_term = NotImplemented | |||||
name: str | |||||
is_term: ClassVar[bool] = NotImplemented | |||||
def __init__(self, name): | |||||
def __init__(self, name: str) -> None: | |||||
self.name = name | self.name = name | ||||
def __eq__(self, other): | def __eq__(self, other): | ||||
@@ -29,7 +34,7 @@ class Symbol(Serialize): | |||||
class Terminal(Symbol): | class Terminal(Symbol): | ||||
__serialize_fields__ = 'name', 'filter_out' | __serialize_fields__ = 'name', 'filter_out' | ||||
is_term = True | |||||
is_term: ClassVar[bool] = True | |||||
def __init__(self, name, filter_out=False): | def __init__(self, name, filter_out=False): | ||||
self.name = name | self.name = name | ||||
@@ -43,13 +48,19 @@ class Terminal(Symbol): | |||||
class NonTerminal(Symbol): | class NonTerminal(Symbol): | ||||
__serialize_fields__ = 'name', | __serialize_fields__ = 'name', | ||||
is_term = False | |||||
is_term: ClassVar[bool] = False | |||||
class RuleOptions(Serialize): | class RuleOptions(Serialize): | ||||
__serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' | ||||
def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()): | |||||
keep_all_tokens: bool | |||||
expand1: bool | |||||
priority: Optional[int] | |||||
template_source: Optional[str] | |||||
empty_indices: Tuple[bool, ...] | |||||
def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None: | |||||
self.keep_all_tokens = keep_all_tokens | self.keep_all_tokens = keep_all_tokens | ||||
self.expand1 = expand1 | self.expand1 = expand1 | ||||
self.priority = priority | self.priority = priority | ||||
@@ -10,8 +10,8 @@ DEC_NUMBER: /0|[1-9][\d_]*/i | |||||
HEX_NUMBER.2: /0x[\da-f]*/i | HEX_NUMBER.2: /0x[\da-f]*/i | ||||
OCT_NUMBER.2: /0o[0-7]*/i | OCT_NUMBER.2: /0o[0-7]*/i | ||||
BIN_NUMBER.2 : /0b[0-1]*/i | BIN_NUMBER.2 : /0b[0-1]*/i | ||||
FLOAT_NUMBER.2: /((\d+\.[\d_]*|\.[\d_]+)([eE][-+]?\d+)?|\d+([eE][-+]?\d+))/ | |||||
IMAG_NUMBER.2: /\d+[jJ]/ | FLOAT_NUMBER /[jJ]/ | |||||
FLOAT_NUMBER.2: /((\d+\.[\d_]*|\.[\d_]+)([Ee][-+]?\d+)?|\d+([Ee][-+]?\d+))/ | |||||
IMAG_NUMBER.2: /\d+[Jj]/ | FLOAT_NUMBER /[Jj]/ | |||||
// Comma-separated list (with an optional trailing comma) | // Comma-separated list (with an optional trailing comma) | ||||
@@ -1,20 +1,27 @@ | |||||
"Provides Indentation services for languages with indentation similar to Python" | "Provides Indentation services for languages with indentation similar to Python" | ||||
from abc import ABC, abstractmethod | |||||
from typing import List, Iterator | |||||
from .exceptions import LarkError | from .exceptions import LarkError | ||||
from .lark import PostLex | from .lark import PostLex | ||||
from .lexer import Token | from .lexer import Token | ||||
###{standalone | ###{standalone | ||||
class DedentError(LarkError): | class DedentError(LarkError): | ||||
pass | pass | ||||
class Indenter(PostLex): | |||||
def __init__(self): | |||||
self.paren_level = None | |||||
self.indent_level = None | |||||
class Indenter(PostLex, ABC): | |||||
paren_level: int | |||||
indent_level: List[int] | |||||
def __init__(self) -> None: | |||||
self.paren_level = 0 | |||||
self.indent_level = [0] | |||||
assert self.tab_len > 0 | assert self.tab_len > 0 | ||||
def handle_NL(self, token): | |||||
def handle_NL(self, token: Token) -> Iterator[Token]: | |||||
if self.paren_level > 0: | if self.paren_level > 0: | ||||
return | return | ||||
@@ -64,4 +71,34 @@ class Indenter(PostLex): | |||||
def always_accept(self): | def always_accept(self): | ||||
return (self.NL_type,) | return (self.NL_type,) | ||||
@property | |||||
@abstractmethod | |||||
def NL_type(self) -> str: | |||||
raise NotImplementedError() | |||||
@property | |||||
@abstractmethod | |||||
def OPEN_PAREN_types(self) -> List[str]: | |||||
raise NotImplementedError() | |||||
@property | |||||
@abstractmethod | |||||
def CLOSE_PAREN_types(self) -> List[str]: | |||||
raise NotImplementedError() | |||||
@property | |||||
@abstractmethod | |||||
def INDENT_type(self) -> str: | |||||
raise NotImplementedError() | |||||
@property | |||||
@abstractmethod | |||||
def DEDENT_type(self) -> str: | |||||
raise NotImplementedError() | |||||
@property | |||||
@abstractmethod | |||||
def tab_len(self) -> int: | |||||
raise NotImplementedError() | |||||
###} | ###} |
@@ -1,26 +1,32 @@ | |||||
from __future__ import absolute_import | |||||
from lark.exceptions import ConfigurationError, assert_config | |||||
from abc import ABC, abstractmethod | |||||
import sys, os, pickle, hashlib | import sys, os, pickle, hashlib | ||||
from io import open | |||||
import tempfile | import tempfile | ||||
from warnings import warn | |||||
from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod | |||||
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files | |||||
from typing import ( | |||||
TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional, | |||||
Tuple, Iterable, IO, Any, TYPE_CHECKING | |||||
) | |||||
if TYPE_CHECKING: | |||||
from .parsers.lalr_interactive_parser import InteractiveParser | |||||
from .visitors import Transformer | |||||
if sys.version_info >= (3, 8): | |||||
from typing import Literal | |||||
else: | |||||
from typing_extensions import Literal | |||||
from .exceptions import ConfigurationError, assert_config, UnexpectedInput | |||||
from .utils import Serialize, SerializeMemoizer, FS, isascii, logger | |||||
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource | |||||
from .tree import Tree | from .tree import Tree | ||||
from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread | |||||
from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token | |||||
from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
from .parser_frontends import get_frontend, _get_lexer_callbacks | from .parser_frontends import get_frontend, _get_lexer_callbacks | ||||
from .grammar import Rule | from .grammar import Rule | ||||
import re | import re | ||||
try: | try: | ||||
import regex | |||||
import regex # type: ignore | |||||
except ImportError: | except ImportError: | ||||
regex = None | regex = None | ||||
@@ -28,10 +34,39 @@ except ImportError: | |||||
###{standalone | ###{standalone | ||||
class PostLex(ABC): | |||||
@abstractmethod | |||||
def process(self, stream: Iterator[Token]) -> Iterator[Token]: | |||||
return stream | |||||
always_accept: Iterable[str] = () | |||||
class LarkOptions(Serialize): | class LarkOptions(Serialize): | ||||
"""Specifies the options for Lark | """Specifies the options for Lark | ||||
""" | """ | ||||
start: List[str] | |||||
debug: bool | |||||
transformer: 'Optional[Transformer]' | |||||
propagate_positions: Union[bool, str] | |||||
maybe_placeholders: bool | |||||
cache: Union[bool, str] | |||||
regex: bool | |||||
g_regex_flags: int | |||||
keep_all_tokens: bool | |||||
tree_class: Any | |||||
parser: 'Literal["earley", "lalr", "cyk", "auto"]' | |||||
lexer: 'Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]' | |||||
ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]' | |||||
postlex: Optional[PostLex] | |||||
priority: 'Optional[Literal["auto", "normal", "invert"]]' | |||||
lexer_callbacks: Dict[str, Callable[[Token], Token]] | |||||
use_bytes: bool | |||||
edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]] | |||||
import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]' | |||||
source_path: Optional[str] | |||||
OPTIONS_DOC = """ | OPTIONS_DOC = """ | ||||
**=== General Options ===** | **=== General Options ===** | ||||
@@ -47,9 +82,8 @@ class LarkOptions(Serialize): | |||||
Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. | Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating. | ||||
maybe_placeholders | maybe_placeholders | ||||
When ``True``, the ``[]`` operator returns ``None`` when not matched. | When ``True``, the ``[]`` operator returns ``None`` when not matched. | ||||
When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. | When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all. | ||||
(default= ``False``. Recommended to set to ``True``) | |||||
(default= ``True``) | |||||
cache | cache | ||||
Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. | Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now. | ||||
@@ -111,12 +145,10 @@ class LarkOptions(Serialize): | |||||
# Adding a new option needs to be done in multiple places: | # Adding a new option needs to be done in multiple places: | ||||
# - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts | # - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts | ||||
# - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs | # - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs | ||||
# - In `lark-stubs/lark.pyi`: | |||||
# - As attribute to `LarkOptions` | |||||
# - As parameter to `Lark.__init__` | |||||
# - As an attribute of `LarkOptions` above | |||||
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | # - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded | ||||
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | # - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument | ||||
_defaults = { | |||||
_defaults: Dict[str, Any] = { | |||||
'debug': False, | 'debug': False, | ||||
'keep_all_tokens': False, | 'keep_all_tokens': False, | ||||
'tree_class': None, | 'tree_class': None, | ||||
@@ -131,7 +163,7 @@ class LarkOptions(Serialize): | |||||
'regex': False, | 'regex': False, | ||||
'propagate_positions': False, | 'propagate_positions': False, | ||||
'lexer_callbacks': {}, | 'lexer_callbacks': {}, | ||||
'maybe_placeholders': False, | |||||
'maybe_placeholders': True, | |||||
'edit_terminals': None, | 'edit_terminals': None, | ||||
'g_regex_flags': 0, | 'g_regex_flags': 0, | ||||
'use_bytes': False, | 'use_bytes': False, | ||||
@@ -153,7 +185,7 @@ class LarkOptions(Serialize): | |||||
options[name] = value | options[name] = value | ||||
if isinstance(options['start'], STRING_TYPE): | |||||
if isinstance(options['start'], str): | |||||
options['start'] = [options['start']] | options['start'] = [options['start']] | ||||
self.__dict__['options'] = options | self.__dict__['options'] = options | ||||
@@ -194,13 +226,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | |||||
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | ||||
class PostLex(ABC): | |||||
@abstractmethod | |||||
def process(self, stream): | |||||
return stream | |||||
always_accept = () | |||||
_T = TypeVar('_T') | |||||
class Lark(Serialize): | class Lark(Serialize): | ||||
"""Main interface for the library. | """Main interface for the library. | ||||
@@ -215,7 +241,15 @@ class Lark(Serialize): | |||||
>>> Lark(r'''start: "foo" ''') | >>> Lark(r'''start: "foo" ''') | ||||
Lark(...) | Lark(...) | ||||
""" | """ | ||||
def __init__(self, grammar, **options): | |||||
source_path: str | |||||
source_grammar: str | |||||
grammar: 'Grammar' | |||||
options: LarkOptions | |||||
lexer: Lexer | |||||
terminals: List[TerminalDef] | |||||
def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None: | |||||
self.options = LarkOptions(options) | self.options = LarkOptions(options) | ||||
# Set regex or re module | # Set regex or re module | ||||
@@ -247,14 +281,11 @@ class Lark(Serialize): | |||||
cache_fn = None | cache_fn = None | ||||
cache_md5 = None | cache_md5 = None | ||||
if isinstance(grammar, STRING_TYPE): | |||||
if isinstance(grammar, str): | |||||
self.source_grammar = grammar | self.source_grammar = grammar | ||||
if self.options.use_bytes: | if self.options.use_bytes: | ||||
if not isascii(grammar): | if not isascii(grammar): | ||||
raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") | raise ConfigurationError("Grammar must be ascii only, when use_bytes=True") | ||||
if sys.version_info[0] == 2 and self.options.use_bytes != 'force': | |||||
raise ConfigurationError("`use_bytes=True` may have issues on python2." | |||||
"Use `use_bytes='force'` to use it at your own risk.") | |||||
if self.options.cache: | if self.options.cache: | ||||
if self.options.parser != 'lalr': | if self.options.parser != 'lalr': | ||||
@@ -266,13 +297,13 @@ class Lark(Serialize): | |||||
s = grammar + options_str + __version__ + str(sys.version_info[:2]) | s = grammar + options_str + __version__ + str(sys.version_info[:2]) | ||||
cache_md5 = hashlib.md5(s.encode('utf8')).hexdigest() | cache_md5 = hashlib.md5(s.encode('utf8')).hexdigest() | ||||
if isinstance(self.options.cache, STRING_TYPE): | |||||
if isinstance(self.options.cache, str): | |||||
cache_fn = self.options.cache | cache_fn = self.options.cache | ||||
else: | else: | ||||
if self.options.cache is not True: | if self.options.cache is not True: | ||||
raise ConfigurationError("cache argument must be bool or str") | raise ConfigurationError("cache argument must be bool or str") | ||||
# Python2.7 doesn't support * syntax in tuples | |||||
cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % ((cache_md5,) + sys.version_info[:2]) | |||||
cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % (cache_md5, *sys.version_info[:2]) | |||||
if FS.exists(cache_fn): | if FS.exists(cache_fn): | ||||
logger.debug('Loading grammar from cache: %s', cache_fn) | logger.debug('Loading grammar from cache: %s', cache_fn) | ||||
@@ -336,7 +367,6 @@ class Lark(Serialize): | |||||
if self.options.priority not in _VALID_PRIORITY_OPTIONS: | if self.options.priority not in _VALID_PRIORITY_OPTIONS: | ||||
raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) | raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) | ||||
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' | |||||
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: | if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: | ||||
raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | ||||
@@ -357,7 +387,6 @@ class Lark(Serialize): | |||||
self._terminals_dict = {t.name: t for t in self.terminals} | self._terminals_dict = {t.name: t for t in self.terminals} | ||||
# If the user asked to invert the priorities, negate them all here. | # If the user asked to invert the priorities, negate them all here. | ||||
# This replaces the old 'resolve__antiscore_sum' option. | |||||
if self.options.priority == 'invert': | if self.options.priority == 'invert': | ||||
for rule in self.rules: | for rule in self.rules: | ||||
if rule.options.priority is not None: | if rule.options.priority is not None: | ||||
@@ -384,6 +413,7 @@ class Lark(Serialize): | |||||
if cache_fn: | if cache_fn: | ||||
logger.debug('Saving grammar to cache: %s', cache_fn) | logger.debug('Saving grammar to cache: %s', cache_fn) | ||||
with FS.open(cache_fn, 'wb') as f: | with FS.open(cache_fn, 'wb') as f: | ||||
assert cache_md5 is not None | |||||
f.write(cache_md5.encode('utf8') + b'\n') | f.write(cache_md5.encode('utf8') + b'\n') | ||||
pickle.dump(used_files, f) | pickle.dump(used_files, f) | ||||
self.save(f) | self.save(f) | ||||
@@ -486,7 +516,7 @@ class Lark(Serialize): | |||||
return inst._load({'data': data, 'memo': memo}, **kwargs) | return inst._load({'data': data, 'memo': memo}, **kwargs) | ||||
@classmethod | @classmethod | ||||
def open(cls, grammar_filename, rel_to=None, **options): | |||||
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T: | |||||
"""Create an instance of Lark with the grammar given by its filename | """Create an instance of Lark with the grammar given by its filename | ||||
If ``rel_to`` is provided, the function will find the grammar filename in relation to it. | If ``rel_to`` is provided, the function will find the grammar filename in relation to it. | ||||
@@ -504,7 +534,7 @@ class Lark(Serialize): | |||||
return cls(f, **options) | return cls(f, **options) | ||||
@classmethod | @classmethod | ||||
def open_from_package(cls, package, grammar_path, search_paths=("",), **options): | |||||
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T: | |||||
"""Create an instance of Lark with the grammar loaded from within the package `package`. | """Create an instance of Lark with the grammar loaded from within the package `package`. | ||||
This allows grammar loading from zipapps. | This allows grammar loading from zipapps. | ||||
@@ -525,7 +555,7 @@ class Lark(Serialize): | |||||
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) | return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer) | ||||
def lex(self, text, dont_ignore=False): | |||||
def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]: | |||||
"""Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard' | """Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard' | ||||
When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore. | When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore. | ||||
@@ -542,11 +572,11 @@ class Lark(Serialize): | |||||
return self.options.postlex.process(stream) | return self.options.postlex.process(stream) | ||||
return stream | return stream | ||||
def get_terminal(self, name): | |||||
def get_terminal(self, name: str) -> TerminalDef: | |||||
"""Get information about a terminal""" | """Get information about a terminal""" | ||||
return self._terminals_dict[name] | return self._terminals_dict[name] | ||||
def parse_interactive(self, text=None, start=None): | |||||
def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser': | |||||
"""Start an interactive parsing session. | """Start an interactive parsing session. | ||||
Parameters: | Parameters: | ||||
@@ -560,7 +590,7 @@ class Lark(Serialize): | |||||
""" | """ | ||||
return self.parser.parse_interactive(text, start=start) | return self.parser.parse_interactive(text, start=start) | ||||
def parse(self, text, start=None, on_error=None): | |||||
def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree: | |||||
"""Parse the given text, according to the options provided. | """Parse the given text, according to the options provided. | ||||
Parameters: | Parameters: | ||||
@@ -580,23 +610,5 @@ class Lark(Serialize): | |||||
""" | """ | ||||
return self.parser.parse(text, start=start, on_error=on_error) | return self.parser.parse(text, start=start, on_error=on_error) | ||||
@property | |||||
def source(self): | |||||
warn("Attribute Lark.source was renamed to Lark.source_path", DeprecationWarning) | |||||
return self.source_path | |||||
@source.setter | |||||
def source(self, value): | |||||
self.source_path = value | |||||
@property | |||||
def grammar_source(self): | |||||
warn("Attribute Lark.grammar_source was renamed to Lark.source_grammar", DeprecationWarning) | |||||
return self.source_grammar | |||||
@grammar_source.setter | |||||
def grammar_source(self, value): | |||||
self.source_grammar = value | |||||
###} | ###} |
@@ -1,20 +1,32 @@ | |||||
# Lexer Implementation | # Lexer Implementation | ||||
from abc import abstractmethod, ABC | |||||
import re | import re | ||||
from .utils import Str, classify, get_regexp_width, Py36, Serialize, suppress | |||||
from contextlib import suppress | |||||
from typing import ( | |||||
TypeVar, Type, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||||
Pattern as REPattern, ClassVar, TYPE_CHECKING | |||||
) | |||||
from types import ModuleType | |||||
if TYPE_CHECKING: | |||||
from .common import LexerConf | |||||
from .utils import classify, get_regexp_width, Serialize | |||||
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | ||||
from .grammar import TOKEN_DEFAULT_PRIORITY | |||||
###{standalone | ###{standalone | ||||
from warnings import warn | |||||
from copy import copy | from copy import copy | ||||
class Pattern(Serialize): | |||||
raw = None | |||||
type = None | |||||
class Pattern(Serialize, ABC): | |||||
value: str | |||||
flags: Collection[str] | |||||
raw: Optional[str] | |||||
type: ClassVar[str] | |||||
def __init__(self, value, flags=(), raw=None): | |||||
def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None: | |||||
self.value = value | self.value = value | ||||
self.flags = frozenset(flags) | self.flags = frozenset(flags) | ||||
self.raw = raw | self.raw = raw | ||||
@@ -29,50 +41,49 @@ class Pattern(Serialize): | |||||
def __eq__(self, other): | def __eq__(self, other): | ||||
return type(self) == type(other) and self.value == other.value and self.flags == other.flags | return type(self) == type(other) and self.value == other.value and self.flags == other.flags | ||||
def to_regexp(self): | |||||
@abstractmethod | |||||
def to_regexp(self) -> str: | |||||
raise NotImplementedError() | raise NotImplementedError() | ||||
def min_width(self): | |||||
@property | |||||
@abstractmethod | |||||
def min_width(self) -> int: | |||||
raise NotImplementedError() | raise NotImplementedError() | ||||
def max_width(self): | |||||
@property | |||||
@abstractmethod | |||||
def max_width(self) -> int: | |||||
raise NotImplementedError() | raise NotImplementedError() | ||||
if Py36: | |||||
# Python 3.6 changed syntax for flags in regular expression | |||||
def _get_flags(self, value): | |||||
for f in self.flags: | |||||
value = ('(?%s:%s)' % (f, value)) | |||||
return value | |||||
else: | |||||
def _get_flags(self, value): | |||||
for f in self.flags: | |||||
value = ('(?%s)' % f) + value | |||||
return value | |||||
def _get_flags(self, value): | |||||
for f in self.flags: | |||||
value = ('(?%s:%s)' % (f, value)) | |||||
return value | |||||
class PatternStr(Pattern): | class PatternStr(Pattern): | ||||
__serialize_fields__ = 'value', 'flags' | __serialize_fields__ = 'value', 'flags' | ||||
type = "str" | |||||
type: ClassVar[str] = "str" | |||||
def to_regexp(self): | |||||
def to_regexp(self) -> str: | |||||
return self._get_flags(re.escape(self.value)) | return self._get_flags(re.escape(self.value)) | ||||
@property | @property | ||||
def min_width(self): | |||||
def min_width(self) -> int: | |||||
return len(self.value) | |||||
@property | |||||
def max_width(self) -> int: | |||||
return len(self.value) | return len(self.value) | ||||
max_width = min_width | |||||
class PatternRE(Pattern): | class PatternRE(Pattern): | ||||
__serialize_fields__ = 'value', 'flags', '_width' | __serialize_fields__ = 'value', 'flags', '_width' | ||||
type = "re" | |||||
type: ClassVar[str] = "re" | |||||
def to_regexp(self): | |||||
def to_regexp(self) -> str: | |||||
return self._get_flags(self.value) | return self._get_flags(self.value) | ||||
_width = None | _width = None | ||||
@@ -82,11 +93,11 @@ class PatternRE(Pattern): | |||||
return self._width | return self._width | ||||
@property | @property | ||||
def min_width(self): | |||||
def min_width(self) -> int: | |||||
return self._get_width()[0] | return self._get_width()[0] | ||||
@property | @property | ||||
def max_width(self): | |||||
def max_width(self) -> int: | |||||
return self._get_width()[1] | return self._get_width()[1] | ||||
@@ -94,7 +105,11 @@ class TerminalDef(Serialize): | |||||
__serialize_fields__ = 'name', 'pattern', 'priority' | __serialize_fields__ = 'name', 'pattern', 'priority' | ||||
__serialize_namespace__ = PatternStr, PatternRE | __serialize_namespace__ = PatternStr, PatternRE | ||||
def __init__(self, name, pattern, priority=1): | |||||
name: str | |||||
pattern: Pattern | |||||
priority: int | |||||
def __init__(self, name: str, pattern: Pattern, priority: int=TOKEN_DEFAULT_PRIORITY) -> None: | |||||
assert isinstance(pattern, Pattern), pattern | assert isinstance(pattern, Pattern), pattern | ||||
self.name = name | self.name = name | ||||
self.pattern = pattern | self.pattern = pattern | ||||
@@ -103,14 +118,15 @@ class TerminalDef(Serialize): | |||||
def __repr__(self): | def __repr__(self): | ||||
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | ||||
def user_repr(self): | |||||
def user_repr(self) -> str: | |||||
if self.name.startswith('__'): # We represent a generated terminal | if self.name.startswith('__'): # We represent a generated terminal | ||||
return self.pattern.raw or self.name | return self.pattern.raw or self.name | ||||
else: | else: | ||||
return self.name | return self.name | ||||
_T = TypeVar('_T') | |||||
class Token(Str): | |||||
class Token(str): | |||||
"""A string with meta-information, that is produced by the lexer. | """A string with meta-information, that is produced by the lexer. | ||||
When parsing text, the resulting chunks of the input that haven't been discarded, | When parsing text, the resulting chunks of the input that haven't been discarded, | ||||
@@ -131,7 +147,16 @@ class Token(Str): | |||||
""" | """ | ||||
__slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | __slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | ||||
def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None): | |||||
type: str | |||||
start_pos: int | |||||
value: Any | |||||
line: int | |||||
column: int | |||||
end_line: int | |||||
end_column: int | |||||
end_pos: int | |||||
def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None): | |||||
try: | try: | ||||
inst = super(Token, cls).__new__(cls, value) | inst = super(Token, cls).__new__(cls, value) | ||||
except UnicodeDecodeError: | except UnicodeDecodeError: | ||||
@@ -139,7 +164,7 @@ class Token(Str): | |||||
inst = super(Token, cls).__new__(cls, value) | inst = super(Token, cls).__new__(cls, value) | ||||
inst.type = type_ | inst.type = type_ | ||||
inst.start_pos = start_pos if start_pos is not None else pos_in_stream | |||||
inst.start_pos = start_pos | |||||
inst.value = value | inst.value = value | ||||
inst.line = line | inst.line = line | ||||
inst.column = column | inst.column = column | ||||
@@ -148,12 +173,7 @@ class Token(Str): | |||||
inst.end_pos = end_pos | inst.end_pos = end_pos | ||||
return inst | return inst | ||||
@property | |||||
def pos_in_stream(self): | |||||
warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning, 2) | |||||
return self.start_pos | |||||
def update(self, type_=None, value=None): | |||||
def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token': | |||||
return Token.new_borrow_pos( | return Token.new_borrow_pos( | ||||
type_ if type_ is not None else self.type, | type_ if type_ is not None else self.type, | ||||
value if value is not None else self.value, | value if value is not None else self.value, | ||||
@@ -161,7 +181,7 @@ class Token(Str): | |||||
) | ) | ||||
@classmethod | @classmethod | ||||
def new_borrow_pos(cls, type_, value, borrow_t): | |||||
def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T: | |||||
return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos) | ||||
def __reduce__(self): | def __reduce__(self): | ||||
@@ -177,9 +197,9 @@ class Token(Str): | |||||
if isinstance(other, Token) and self.type != other.type: | if isinstance(other, Token) and self.type != other.type: | ||||
return False | return False | ||||
return Str.__eq__(self, other) | |||||
return str.__eq__(self, other) | |||||
__hash__ = Str.__hash__ | |||||
__hash__ = str.__hash__ | |||||
class LineCounter: | class LineCounter: | ||||
@@ -198,7 +218,7 @@ class LineCounter: | |||||
return self.char_pos == other.char_pos and self.newline_char == other.newline_char | return self.char_pos == other.char_pos and self.newline_char == other.newline_char | ||||
def feed(self, token, test_newline=True): | |||||
def feed(self, token: Token, test_newline=True): | |||||
"""Consume a token and calculate the new line & column. | """Consume a token and calculate the new line & column. | ||||
As an optional optimization, set test_newline=False if token doesn't contain a newline. | As an optional optimization, set test_newline=False if token doesn't contain a newline. | ||||
@@ -262,7 +282,6 @@ def _create_unless(terminals, g_regex_flags, re_, use_bytes): | |||||
return new_terminals, callback | return new_terminals, callback | ||||
class Scanner: | class Scanner: | ||||
def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False): | def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False): | ||||
self.terminals = terminals | self.terminals = terminals | ||||
@@ -301,7 +320,7 @@ class Scanner: | |||||
return m.group(0), type_from_index[m.lastindex] | return m.group(0), type_from_index[m.lastindex] | ||||
def _regexp_has_newline(r): | |||||
def _regexp_has_newline(r: str): | |||||
r"""Expressions that may indicate newlines in a regexp: | r"""Expressions that may indicate newlines in a regexp: | ||||
- newlines (\n) | - newlines (\n) | ||||
- escaped newline (\\n) | - escaped newline (\\n) | ||||
@@ -312,13 +331,35 @@ def _regexp_has_newline(r): | |||||
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r) | ||||
class Lexer(object): | |||||
class LexerState(object): | |||||
__slots__ = 'text', 'line_ctr', 'last_token' | |||||
def __init__(self, text, line_ctr, last_token=None): | |||||
self.text = text | |||||
self.line_ctr = line_ctr | |||||
self.last_token = last_token | |||||
def __eq__(self, other): | |||||
if not isinstance(other, LexerState): | |||||
return NotImplemented | |||||
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||||
def __copy__(self): | |||||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||||
_Callback = Callable[[Token], Token] | |||||
class Lexer(ABC): | |||||
"""Lexer interface | """Lexer interface | ||||
Method Signatures: | Method Signatures: | ||||
lex(self, text) -> Iterator[Token] | |||||
lex(self, lexer_state, parser_state) -> Iterator[Token] | |||||
""" | """ | ||||
lex = NotImplemented | |||||
@abstractmethod | |||||
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||||
return NotImplemented | |||||
def make_lexer_state(self, text): | def make_lexer_state(self, text): | ||||
line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') | line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n') | ||||
@@ -327,7 +368,14 @@ class Lexer(object): | |||||
class TraditionalLexer(Lexer): | class TraditionalLexer(Lexer): | ||||
def __init__(self, conf): | |||||
terminals: Collection[TerminalDef] | |||||
ignore_types: FrozenSet[str] | |||||
newline_types: FrozenSet[str] | |||||
user_callbacks: Dict[str, _Callback] | |||||
callback: Dict[str, _Callback] | |||||
re: ModuleType | |||||
def __init__(self, conf: 'LexerConf') -> None: | |||||
terminals = list(conf.terminals) | terminals = list(conf.terminals) | ||||
assert all(isinstance(t, TerminalDef) for t in terminals), terminals | assert all(isinstance(t, TerminalDef) for t in terminals), terminals | ||||
@@ -382,12 +430,12 @@ class TraditionalLexer(Lexer): | |||||
def match(self, text, pos): | def match(self, text, pos): | ||||
return self.scanner.match(text, pos) | return self.scanner.match(text, pos) | ||||
def lex(self, state, parser_state): | |||||
def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]: | |||||
with suppress(EOFError): | with suppress(EOFError): | ||||
while True: | while True: | ||||
yield self.next_token(state, parser_state) | yield self.next_token(state, parser_state) | ||||
def next_token(self, lex_state, parser_state=None): | |||||
def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token: | |||||
line_ctr = lex_state.line_ctr | line_ctr = lex_state.line_ctr | ||||
while line_ctr.char_pos < len(lex_state.text): | while line_ctr.char_pos < len(lex_state.text): | ||||
res = self.match(lex_state.text, line_ctr.char_pos) | res = self.match(lex_state.text, line_ctr.char_pos) | ||||
@@ -423,27 +471,12 @@ class TraditionalLexer(Lexer): | |||||
raise EOFError(self) | raise EOFError(self) | ||||
class LexerState(object): | |||||
__slots__ = 'text', 'line_ctr', 'last_token' | |||||
def __init__(self, text, line_ctr, last_token=None): | |||||
self.text = text | |||||
self.line_ctr = line_ctr | |||||
self.last_token = last_token | |||||
def __eq__(self, other): | |||||
if not isinstance(other, LexerState): | |||||
return NotImplemented | |||||
return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token | |||||
def __copy__(self): | |||||
return type(self)(self.text, copy(self.line_ctr), self.last_token) | |||||
class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
def __init__(self, conf, states, always_accept=()): | |||||
lexers: Dict[str, TraditionalLexer] | |||||
root_lexer: TraditionalLexer | |||||
def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None: | |||||
terminals = list(conf.terminals) | terminals = list(conf.terminals) | ||||
terminals_by_name = conf.terminals_by_name | terminals_by_name = conf.terminals_by_name | ||||
@@ -471,7 +504,7 @@ class ContextualLexer(Lexer): | |||||
def make_lexer_state(self, text): | def make_lexer_state(self, text): | ||||
return self.root_lexer.make_lexer_state(text) | return self.root_lexer.make_lexer_state(text) | ||||
def lex(self, lexer_state, parser_state): | |||||
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]: | |||||
try: | try: | ||||
while True: | while True: | ||||
lexer = self.lexers[parser_state.position] | lexer = self.lexers[parser_state.position] | ||||
@@ -4,20 +4,20 @@ import os.path | |||||
import sys | import sys | ||||
from collections import namedtuple | from collections import namedtuple | ||||
from copy import copy, deepcopy | from copy import copy, deepcopy | ||||
from io import open | |||||
import pkgutil | import pkgutil | ||||
from ast import literal_eval | from ast import literal_eval | ||||
from numbers import Integral | |||||
from contextlib import suppress | |||||
from typing import List, Tuple, Union, Callable, Dict, Optional | |||||
from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors | |||||
from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors | |||||
from .lexer import Token, TerminalDef, PatternStr, PatternRE | from .lexer import Token, TerminalDef, PatternStr, PatternRE | ||||
from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
from .parser_frontends import ParsingFrontend | from .parser_frontends import ParsingFrontend | ||||
from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||||
from .utils import classify, suppress, dedup_list, Str | |||||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError | |||||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, TOKEN_DEFAULT_PRIORITY | |||||
from .utils import classify, dedup_list | |||||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput | |||||
from .tree import Tree, SlottedTree as ST | from .tree import Tree, SlottedTree as ST | ||||
from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive | from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive | ||||
@@ -585,18 +585,7 @@ class PrepareLiterals(Transformer_InPlace): | |||||
def _make_joined_pattern(regexp, flags_set): | def _make_joined_pattern(regexp, flags_set): | ||||
# In Python 3.6, a new syntax for flags was introduced, that allows us to restrict the scope | |||||
# of flags to a specific regexp group. We are already using it in `lexer.Pattern._get_flags` | |||||
# However, for prior Python versions, we still need to use global flags, so we have to make sure | |||||
# that there are no flag collisions when we merge several terminals. | |||||
flags = () | |||||
if not Py36: | |||||
if len(flags_set) > 1: | |||||
raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!") | |||||
elif len(flags_set) == 1: | |||||
flags ,= flags_set | |||||
return PatternRE(regexp, flags) | |||||
return PatternRE(regexp, ()) | |||||
class TerminalTreeToPattern(Transformer_NonRecursive): | class TerminalTreeToPattern(Transformer_NonRecursive): | ||||
def pattern(self, ps): | def pattern(self, ps): | ||||
@@ -652,9 +641,9 @@ class PrepareSymbols(Transformer_InPlace): | |||||
if isinstance(v, Tree): | if isinstance(v, Tree): | ||||
return v | return v | ||||
elif v.type == 'RULE': | elif v.type == 'RULE': | ||||
return NonTerminal(Str(v.value)) | |||||
return NonTerminal(str(v.value)) | |||||
elif v.type == 'TERMINAL': | elif v.type == 'TERMINAL': | ||||
return Terminal(Str(v.value), filter_out=v.startswith('_')) | |||||
return Terminal(str(v.value), filter_out=v.startswith('_')) | |||||
assert False | assert False | ||||
@@ -664,7 +653,12 @@ def nr_deepcopy_tree(t): | |||||
class Grammar: | class Grammar: | ||||
def __init__(self, rule_defs, term_defs, ignore): | |||||
term_defs: List[Tuple[str, Tuple[Tree, int]]] | |||||
rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]] | |||||
ignore: List[str] | |||||
def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None: | |||||
self.term_defs = term_defs | self.term_defs = term_defs | ||||
self.rule_defs = rule_defs | self.rule_defs = rule_defs | ||||
self.ignore = ignore | self.ignore = ignore | ||||
@@ -807,14 +801,18 @@ class FromPackageLoader(object): | |||||
pkg_name: The name of the package. You can probably provide `__name__` most of the time | pkg_name: The name of the package. You can probably provide `__name__` most of the time | ||||
search_paths: All the path that will be search on absolute imports. | search_paths: All the path that will be search on absolute imports. | ||||
""" | """ | ||||
def __init__(self, pkg_name, search_paths=("", )): | |||||
pkg_name: str | |||||
search_paths: Tuple[str, ...] | |||||
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None: | |||||
self.pkg_name = pkg_name | self.pkg_name = pkg_name | ||||
self.search_paths = search_paths | self.search_paths = search_paths | ||||
def __repr__(self): | def __repr__(self): | ||||
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths) | ||||
def __call__(self, base_path, grammar_path): | |||||
def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: | |||||
if base_path is None: | if base_path is None: | ||||
to_try = self.search_paths | to_try = self.search_paths | ||||
else: | else: | ||||
@@ -991,7 +989,7 @@ def _search_interactive_parser(interactive_parser, predicate): | |||||
if predicate(p): | if predicate(p): | ||||
return path, p | return path, p | ||||
def find_grammar_errors(text, start='start'): | |||||
def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: | |||||
errors = [] | errors = [] | ||||
def on_error(e): | def on_error(e): | ||||
errors.append((e, _error_repr(e))) | errors.append((e, _error_repr(e))) | ||||
@@ -1040,7 +1038,12 @@ def _mangle_exp(exp, mangle): | |||||
class GrammarBuilder: | class GrammarBuilder: | ||||
def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None): | |||||
global_keep_all_tokens: bool | |||||
import_paths: List[Union[str, Callable]] | |||||
used_files: Dict[str, str] | |||||
def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None: | |||||
self.global_keep_all_tokens = global_keep_all_tokens | self.global_keep_all_tokens = global_keep_all_tokens | ||||
self.import_paths = import_paths or [] | self.import_paths = import_paths or [] | ||||
self.used_files = used_files or {} | self.used_files = used_files or {} | ||||
@@ -1066,8 +1069,7 @@ class GrammarBuilder: | |||||
if self._is_term(name): | if self._is_term(name): | ||||
if options is None: | if options is None: | ||||
options = 1 | options = 1 | ||||
# if we don't use Integral here, we run into python2.7/python3 problems with long vs int | |||||
elif not isinstance(options, Integral): | |||||
elif not isinstance(options, int): | |||||
raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),)) | raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),)) | ||||
else: | else: | ||||
if options is None: | if options is None: | ||||
@@ -1120,7 +1122,7 @@ class GrammarBuilder: | |||||
name = '__IGNORE_%d'% len(self._ignore_names) | name = '__IGNORE_%d'% len(self._ignore_names) | ||||
self._ignore_names.append(name) | self._ignore_names.append(name) | ||||
self._definitions[name] = ((), t, 1) | |||||
self._definitions[name] = ((), t, TOKEN_DEFAULT_PRIORITY) | |||||
def _declare(self, *names): | def _declare(self, *names): | ||||
for name in names: | for name in names: | ||||
@@ -1171,7 +1173,7 @@ class GrammarBuilder: | |||||
else: | else: | ||||
name = tree.children[0].value | name = tree.children[0].value | ||||
params = () # TODO terminal templates | params = () # TODO terminal templates | ||||
opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority | |||||
opts = int(tree.children[1]) if len(tree.children) == 3 else TOKEN_DEFAULT_PRIORITY # priority | |||||
exp = tree.children[-1] | exp = tree.children[-1] | ||||
if mangle is not None: | if mangle is not None: | ||||
@@ -1182,7 +1184,7 @@ class GrammarBuilder: | |||||
return name, exp, params, opts | return name, exp, params, opts | ||||
def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None): | |||||
def load_grammar(self, grammar_text: str, grammar_name: str="<?>", mangle: Optional[Callable[[str], str]]=None) -> None: | |||||
tree = _parse_grammar(grammar_text, grammar_name) | tree = _parse_grammar(grammar_text, grammar_name) | ||||
imports = {} | imports = {} | ||||
@@ -1245,7 +1247,7 @@ class GrammarBuilder: | |||||
self._definitions = {k: v for k, v in self._definitions.items() if k in _used} | self._definitions = {k: v for k, v in self._definitions.items() if k in _used} | ||||
def do_import(self, dotted_path, base_path, aliases, base_mangle=None): | |||||
def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None: | |||||
assert dotted_path | assert dotted_path | ||||
mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) | mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle) | ||||
grammar_path = os.path.join(*dotted_path) + EXT | grammar_path = os.path.join(*dotted_path) + EXT | ||||
@@ -1281,7 +1283,7 @@ class GrammarBuilder: | |||||
assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) | assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,) | ||||
def validate(self): | |||||
def validate(self) -> None: | |||||
for name, (params, exp, _options) in self._definitions.items(): | for name, (params, exp, _options) in self._definitions.items(): | ||||
for i, p in enumerate(params): | for i, p in enumerate(params): | ||||
if p in self._definitions: | if p in self._definitions: | ||||
@@ -1310,7 +1312,7 @@ class GrammarBuilder: | |||||
if not set(self._definitions).issuperset(self._ignore_names): | if not set(self._definitions).issuperset(self._ignore_names): | ||||
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) | raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions))) | ||||
def build(self): | |||||
def build(self) -> Grammar: | |||||
self.validate() | self.validate() | ||||
rule_defs = [] | rule_defs = [] | ||||
term_defs = [] | term_defs = [] | ||||
@@ -1,7 +1,8 @@ | |||||
from typing import List | |||||
from .exceptions import GrammarError, ConfigurationError | from .exceptions import GrammarError, ConfigurationError | ||||
from .lexer import Token | from .lexer import Token | ||||
from .tree import Tree | from .tree import Tree | ||||
from .visitors import InlineTransformer # XXX Deprecated | |||||
from .visitors import Transformer_InPlace | from .visitors import Transformer_InPlace | ||||
from .visitors import _vargs_meta, _vargs_meta_inline | from .visitors import _vargs_meta, _vargs_meta_inline | ||||
@@ -152,7 +153,7 @@ def _should_expand(sym): | |||||
return not sym.is_term and sym.name.startswith('_') | return not sym.is_term and sym.name.startswith('_') | ||||
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices): | |||||
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]): | |||||
# Prepare empty_indices as: How many Nones to insert at each index? | # Prepare empty_indices as: How many Nones to insert at each index? | ||||
if _empty_indices: | if _empty_indices: | ||||
assert _empty_indices.count(False) == len(expansion) | assert _empty_indices.count(False) == len(expansion) | ||||
@@ -301,12 +302,6 @@ class AmbiguousIntermediateExpander: | |||||
return self.node_builder(children) | return self.node_builder(children) | ||||
def ptb_inline_args(func): | |||||
@wraps(func) | |||||
def f(children): | |||||
return func(*children) | |||||
return f | |||||
def inplace_transformer(func): | def inplace_transformer(func): | ||||
@wraps(func) | @wraps(func) | ||||
@@ -362,15 +357,11 @@ class ParseTreeBuilder: | |||||
user_callback_name = rule.alias or rule.options.template_source or rule.origin.name | user_callback_name = rule.alias or rule.options.template_source or rule.origin.name | ||||
try: | try: | ||||
f = getattr(transformer, user_callback_name) | f = getattr(transformer, user_callback_name) | ||||
# XXX InlineTransformer is deprecated! | |||||
wrapper = getattr(f, 'visit_wrapper', None) | wrapper = getattr(f, 'visit_wrapper', None) | ||||
if wrapper is not None: | if wrapper is not None: | ||||
f = apply_visit_wrapper(f, user_callback_name, wrapper) | f = apply_visit_wrapper(f, user_callback_name, wrapper) | ||||
else: | |||||
if isinstance(transformer, InlineTransformer): | |||||
f = ptb_inline_args(f) | |||||
elif isinstance(transformer, Transformer_InPlace): | |||||
f = inplace_transformer(f) | |||||
elif isinstance(transformer, Transformer_InPlace): | |||||
f = inplace_transformer(f) | |||||
except AttributeError: | except AttributeError: | ||||
f = partial(self.tree_class, user_callback_name) | f = partial(self.tree_class, user_callback_name) | ||||
@@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser | |||||
from .tree import Tree | from .tree import Tree | ||||
from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
try: | try: | ||||
import regex | |||||
import regex # type: ignore | |||||
except ImportError: | except ImportError: | ||||
regex = None | regex = None | ||||
import re | import re | ||||
@@ -32,20 +32,13 @@ class MakeParsingFrontend: | |||||
self.parser_type = parser_type | self.parser_type = parser_type | ||||
self.lexer_type = lexer_type | self.lexer_type = lexer_type | ||||
def __call__(self, lexer_conf, parser_conf, options): | |||||
assert isinstance(lexer_conf, LexerConf) | |||||
assert isinstance(parser_conf, ParserConf) | |||||
parser_conf.parser_type = self.parser_type | |||||
lexer_conf.lexer_type = self.lexer_type | |||||
return ParsingFrontend(lexer_conf, parser_conf, options) | |||||
def deserialize(self, data, memo, lexer_conf, callbacks, options): | def deserialize(self, data, memo, lexer_conf, callbacks, options): | ||||
parser_conf = ParserConf.deserialize(data['parser_conf'], memo) | parser_conf = ParserConf.deserialize(data['parser_conf'], memo) | ||||
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) | parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug) | ||||
parser_conf.callbacks = callbacks | parser_conf.callbacks = callbacks | ||||
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) | return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser) | ||||
# ... Continued later in the module | |||||
class ParsingFrontend(Serialize): | class ParsingFrontend(Serialize): | ||||
@@ -169,7 +162,7 @@ class EarleyRegexpMatcher: | |||||
def __init__(self, lexer_conf): | def __init__(self, lexer_conf): | ||||
self.regexps = {} | self.regexps = {} | ||||
for t in lexer_conf.terminals: | for t in lexer_conf.terminals: | ||||
if t.priority != 1: | |||||
if t.priority: | |||||
raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority) | raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority) | ||||
regexp = t.pattern.to_regexp() | regexp = t.pattern.to_regexp() | ||||
try: | try: | ||||
@@ -237,3 +230,12 @@ class CYK_FrontEnd: | |||||
def _apply_callback(self, tree): | def _apply_callback(self, tree): | ||||
return self.callbacks[tree.rule](tree.children) | return self.callbacks[tree.rule](tree.children) | ||||
class MakeParsingFrontend(MakeParsingFrontend): | |||||
def __call__(self, lexer_conf, parser_conf, options): | |||||
assert isinstance(lexer_conf, LexerConf) | |||||
assert isinstance(parser_conf, ParserConf) | |||||
parser_conf.parser_type = self.parser_type | |||||
lexer_conf.lexer_type = self.lexer_type | |||||
return ParsingFrontend(lexer_conf, parser_conf, options) |
@@ -126,7 +126,3 @@ class ImmutableInteractiveParser(InteractiveParser): | |||||
p = copy(self) | p = copy(self) | ||||
return InteractiveParser(p.parser, p.parser_state, p.lexer_state) | return InteractiveParser(p.parser, p.parser_state, p.lexer_state) | ||||
# Deprecated class names for the interactive parser | |||||
ParserPuppet = InteractiveParser | |||||
ImmutableParserPuppet = ImmutableInteractiveParser |
@@ -1,3 +0,0 @@ | |||||
# Deprecated | |||||
from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet |
@@ -1,11 +1,13 @@ | |||||
"""Reconstruct text from a tree, based on Lark grammar""" | """Reconstruct text from a tree, based on Lark grammar""" | ||||
from typing import List, Dict, Union, Callable, Iterable, Optional | |||||
import unicodedata | import unicodedata | ||||
from .lark import Lark | |||||
from .tree import Tree | from .tree import Tree | ||||
from .visitors import Transformer_InPlace | from .visitors import Transformer_InPlace | ||||
from .lexer import Token, PatternStr | |||||
from .grammar import Terminal, NonTerminal | |||||
from .lexer import Token, PatternStr, TerminalDef | |||||
from .grammar import Terminal, NonTerminal, Symbol | |||||
from .tree_matcher import TreeMatcher, is_discarded_terminal | from .tree_matcher import TreeMatcher, is_discarded_terminal | ||||
from .utils import is_id_continue | from .utils import is_id_continue | ||||
@@ -21,7 +23,10 @@ def is_iter_empty(i): | |||||
class WriteTokensTransformer(Transformer_InPlace): | class WriteTokensTransformer(Transformer_InPlace): | ||||
"Inserts discarded tokens into their correct place, according to the rules of grammar" | "Inserts discarded tokens into their correct place, according to the rules of grammar" | ||||
def __init__(self, tokens, term_subs): | |||||
tokens: Dict[str, TerminalDef] | |||||
term_subs: Dict[str, Callable[[Symbol], str]] | |||||
def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: | |||||
self.tokens = tokens | self.tokens = tokens | ||||
self.term_subs = term_subs | self.term_subs = term_subs | ||||
@@ -70,7 +75,9 @@ class Reconstructor(TreeMatcher): | |||||
term_subs: a dictionary of [Terminal name as str] to [output text as str] | term_subs: a dictionary of [Terminal name as str] to [output text as str] | ||||
""" | """ | ||||
def __init__(self, parser, term_subs=None): | |||||
write_tokens: WriteTokensTransformer | |||||
def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: | |||||
TreeMatcher.__init__(self, parser) | TreeMatcher.__init__(self, parser) | ||||
self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) | self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) | ||||
@@ -87,7 +94,7 @@ class Reconstructor(TreeMatcher): | |||||
else: | else: | ||||
yield item | yield item | ||||
def reconstruct(self, tree, postproc=None, insert_spaces=True): | |||||
def reconstruct(self, tree: Tree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: | |||||
x = self._reconstruct(tree) | x = self._reconstruct(tree) | ||||
if postproc: | if postproc: | ||||
x = postproc(x) | x = postproc(x) | ||||
@@ -6,7 +6,7 @@ import codecs | |||||
import argparse | import argparse | ||||
from lark import Lark, InlineTransformer | |||||
from lark import Lark, Transformer, v_args | |||||
nearley_grammar = r""" | nearley_grammar = r""" | ||||
start: (ruledef|directive)+ | start: (ruledef|directive)+ | ||||
@@ -50,7 +50,8 @@ def _get_rulename(name): | |||||
name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) | name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) | ||||
return 'n_' + name.replace('$', '__DOLLAR__').lower() | return 'n_' + name.replace('$', '__DOLLAR__').lower() | ||||
class NearleyToLark(InlineTransformer): | |||||
@v_args(inline=True) | |||||
class NearleyToLark(Transformer): | |||||
def __init__(self): | def __init__(self): | ||||
self._count = 0 | self._count = 0 | ||||
self.extra_rules = {} | self.extra_rules = {} | ||||
@@ -1,5 +1,3 @@ | |||||
from __future__ import print_function | |||||
###{standalone | ###{standalone | ||||
# | # | ||||
# | # | ||||
@@ -26,7 +24,14 @@ from __future__ import print_function | |||||
# | # | ||||
# | # | ||||
from io import open | |||||
from abc import ABC, abstractmethod | |||||
from collections.abc import Sequence | |||||
from types import ModuleType | |||||
from typing import ( | |||||
TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any, | |||||
Union, Iterable, IO, TYPE_CHECKING, | |||||
Pattern as REPattern, ClassVar, Set, | |||||
) | |||||
###} | ###} | ||||
import sys | import sys | ||||
@@ -35,15 +40,13 @@ import os | |||||
from os import path | from os import path | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from functools import partial | from functools import partial | ||||
from argparse import ArgumentParser, SUPPRESS | |||||
from warnings import warn | |||||
from argparse import ArgumentParser | |||||
import lark | import lark | ||||
from lark import Lark | |||||
from lark.tools import lalr_argparser, build_lalr, make_warnings_comments | from lark.tools import lalr_argparser, build_lalr, make_warnings_comments | ||||
from lark.grammar import RuleOptions, Rule | |||||
from lark.grammar import Rule | |||||
from lark.lexer import TerminalDef | from lark.lexer import TerminalDef | ||||
_dir = path.dirname(__file__) | _dir = path.dirname(__file__) | ||||
@@ -118,11 +121,6 @@ def strip_docstrings(line_gen): | |||||
return ''.join(res) | return ''.join(res) | ||||
def main(fobj, start, print=print): | |||||
warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning) | |||||
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start) | |||||
gen_standalone(lark_inst, print) | |||||
def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False): | def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False): | ||||
if output is None: | if output is None: | ||||
output = partial(print, file=out) | output = partial(print, file=out) | ||||
@@ -179,15 +177,11 @@ def main(): | |||||
make_warnings_comments() | make_warnings_comments() | ||||
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", | parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool", | ||||
parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') | parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options') | ||||
parser.add_argument("old_start", nargs='?', help=SUPPRESS) | |||||
parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression") | parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression") | ||||
if len(sys.argv)==1: | if len(sys.argv)==1: | ||||
parser.print_help(sys.stderr) | parser.print_help(sys.stderr) | ||||
sys.exit(1) | sys.exit(1) | ||||
ns = parser.parse_args() | ns = parser.parse_args() | ||||
if ns.old_start is not None: | |||||
warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option') | |||||
ns.start.append(ns.old_start) | |||||
lark_inst, out = build_lalr(ns) | lark_inst, out = build_lalr(ns) | ||||
gen_standalone(lark_inst, out=out, compress=ns.compress) | gen_standalone(lark_inst, out=out, compress=ns.compress) | ||||
@@ -1,16 +1,36 @@ | |||||
try: | try: | ||||
from future_builtins import filter | |||||
from future_builtins import filter # type: ignore | |||||
except ImportError: | except ImportError: | ||||
pass | pass | ||||
import sys | |||||
from copy import deepcopy | from copy import deepcopy | ||||
from typing import List, Callable, Iterator, Union, Optional, TYPE_CHECKING | |||||
if TYPE_CHECKING: | |||||
from .lexer import TerminalDef | |||||
if sys.version_info >= (3, 8): | |||||
from typing import Literal | |||||
else: | |||||
from typing_extensions import Literal | |||||
###{standalone | ###{standalone | ||||
from collections import OrderedDict | from collections import OrderedDict | ||||
class Meta: | class Meta: | ||||
empty: bool | |||||
line: int | |||||
column: int | |||||
start_pos: int | |||||
end_line: int | |||||
end_column: int | |||||
end_pos: int | |||||
orig_expansion: 'List[TerminalDef]' | |||||
match_tree: bool | |||||
def __init__(self): | def __init__(self): | ||||
self.empty = True | self.empty = True | ||||
@@ -27,13 +47,17 @@ class Tree(object): | |||||
meta: Line & Column numbers (if ``propagate_positions`` is enabled). | meta: Line & Column numbers (if ``propagate_positions`` is enabled). | ||||
meta attributes: line, column, start_pos, end_line, end_column, end_pos | meta attributes: line, column, start_pos, end_line, end_column, end_pos | ||||
""" | """ | ||||
def __init__(self, data, children, meta=None): | |||||
data: str | |||||
children: 'List[Union[str, Tree]]' | |||||
def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None: | |||||
self.data = data | self.data = data | ||||
self.children = children | self.children = children | ||||
self._meta = meta | self._meta = meta | ||||
@property | @property | ||||
def meta(self): | |||||
def meta(self) -> Meta: | |||||
if self._meta is None: | if self._meta is None: | ||||
self._meta = Meta() | self._meta = Meta() | ||||
return self._meta | return self._meta | ||||
@@ -57,7 +81,7 @@ class Tree(object): | |||||
return l | return l | ||||
def pretty(self, indent_str=' '): | |||||
def pretty(self, indent_str: str=' ') -> str: | |||||
"""Returns an indented string representation of the tree. | """Returns an indented string representation of the tree. | ||||
Great for debugging. | Great for debugging. | ||||
@@ -73,10 +97,10 @@ class Tree(object): | |||||
def __ne__(self, other): | def __ne__(self, other): | ||||
return not (self == other) | return not (self == other) | ||||
def __hash__(self): | |||||
def __hash__(self) -> int: | |||||
return hash((self.data, tuple(self.children))) | return hash((self.data, tuple(self.children))) | ||||
def iter_subtrees(self): | |||||
def iter_subtrees(self) -> 'Iterator[Tree]': | |||||
"""Depth-first iteration. | """Depth-first iteration. | ||||
Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). | Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG). | ||||
@@ -91,17 +115,17 @@ class Tree(object): | |||||
del queue | del queue | ||||
return reversed(list(subtrees.values())) | return reversed(list(subtrees.values())) | ||||
def find_pred(self, pred): | |||||
def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]': | |||||
"""Returns all nodes of the tree that evaluate pred(node) as true.""" | """Returns all nodes of the tree that evaluate pred(node) as true.""" | ||||
return filter(pred, self.iter_subtrees()) | return filter(pred, self.iter_subtrees()) | ||||
def find_data(self, data): | |||||
def find_data(self, data: str) -> 'Iterator[Tree]': | |||||
"""Returns all nodes of the tree whose data equals the given data.""" | """Returns all nodes of the tree whose data equals the given data.""" | ||||
return self.find_pred(lambda t: t.data == data) | return self.find_pred(lambda t: t.data == data) | ||||
###} | ###} | ||||
def expand_kids_by_index(self, *indices): | |||||
def expand_kids_by_index(self, *indices: int) -> None: | |||||
"""Expand (inline) children at the given indices""" | """Expand (inline) children at the given indices""" | ||||
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices | ||||
kid = self.children[i] | kid = self.children[i] | ||||
@@ -118,7 +142,7 @@ class Tree(object): | |||||
return changed | return changed | ||||
def scan_values(self, pred): | |||||
def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]: | |||||
"""Return all values in the tree that evaluate pred(value) as true. | """Return all values in the tree that evaluate pred(value) as true. | ||||
This can be used to find all the tokens in the tree. | This can be used to find all the tokens in the tree. | ||||
@@ -151,36 +175,20 @@ class Tree(object): | |||||
def __deepcopy__(self, memo): | def __deepcopy__(self, memo): | ||||
return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta) | return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta) | ||||
def copy(self): | |||||
def copy(self) -> 'Tree': | |||||
return type(self)(self.data, self.children) | return type(self)(self.data, self.children) | ||||
def set(self, data, children): | |||||
def set(self, data: str, children: 'List[Union[str, Tree]]') -> None: | |||||
self.data = data | self.data = data | ||||
self.children = children | self.children = children | ||||
# XXX Deprecated! Here for backwards compatibility <0.6.0 | |||||
@property | |||||
def line(self): | |||||
return self.meta.line | |||||
@property | |||||
def column(self): | |||||
return self.meta.column | |||||
@property | |||||
def end_line(self): | |||||
return self.meta.end_line | |||||
@property | |||||
def end_column(self): | |||||
return self.meta.end_column | |||||
class SlottedTree(Tree): | class SlottedTree(Tree): | ||||
__slots__ = 'data', 'children', 'rule', '_meta' | __slots__ = 'data', 'children', 'rule', '_meta' | ||||
def pydot__tree_to_png(tree, filename, rankdir="LR", **kwargs): | |||||
def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None: | |||||
graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | graph = pydot__tree_to_graph(tree, rankdir, **kwargs) | ||||
graph.write_png(filename) | graph.write_png(filename) | ||||
@@ -201,7 +209,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs): | |||||
possible attributes, see https://www.graphviz.org/doc/info/attrs.html. | possible attributes, see https://www.graphviz.org/doc/info/attrs.html. | ||||
""" | """ | ||||
import pydot | |||||
import pydot # type: ignore | |||||
graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) | graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs) | ||||
i = [0] | i = [0] | ||||
@@ -1,4 +1,3 @@ | |||||
import hashlib | |||||
import unicodedata | import unicodedata | ||||
import os | import os | ||||
from functools import reduce | from functools import reduce | ||||
@@ -7,23 +6,12 @@ from collections import deque | |||||
###{standalone | ###{standalone | ||||
import sys, re | import sys, re | ||||
import logging | import logging | ||||
from io import open | |||||
logger = logging.getLogger("lark") | |||||
logger: logging.Logger = logging.getLogger("lark") | |||||
logger.addHandler(logging.StreamHandler()) | logger.addHandler(logging.StreamHandler()) | ||||
# Set to highest level, since we have some warnings amongst the code | # Set to highest level, since we have some warnings amongst the code | ||||
# By default, we should not output any log messages | # By default, we should not output any log messages | ||||
logger.setLevel(logging.CRITICAL) | logger.setLevel(logging.CRITICAL) | ||||
if sys.version_info[0]>2: | |||||
from abc import ABC, abstractmethod | |||||
else: | |||||
from abc import ABCMeta, abstractmethod | |||||
class ABC(object): # Provide Python27 compatibility | |||||
__slots__ = () | |||||
__metclass__ = ABCMeta | |||||
Py36 = (sys.version_info[:2] >= (3, 6)) | |||||
NO_VALUE = object() | NO_VALUE = object() | ||||
@@ -120,28 +108,16 @@ class SerializeMemoizer(Serialize): | |||||
return _deserialize(data, namespace, memo) | return _deserialize(data, namespace, memo) | ||||
try: | |||||
STRING_TYPE = basestring | |||||
except NameError: # Python 3 | |||||
STRING_TYPE = str | |||||
import types | import types | ||||
from functools import wraps, partial | from functools import wraps, partial | ||||
from contextlib import contextmanager | |||||
Str = type(u'') | |||||
try: | |||||
classtype = types.ClassType # Python2 | |||||
except AttributeError: | |||||
classtype = type # Python3 | |||||
def smart_decorator(f, create_decorator): | def smart_decorator(f, create_decorator): | ||||
if isinstance(f, types.FunctionType): | if isinstance(f, types.FunctionType): | ||||
return wraps(f)(create_decorator(f, True)) | return wraps(f)(create_decorator(f, True)) | ||||
elif isinstance(f, (classtype, type, types.BuiltinFunctionType)): | |||||
elif isinstance(f, (type, types.BuiltinFunctionType)): | |||||
return wraps(f)(create_decorator(f, False)) | return wraps(f)(create_decorator(f, False)) | ||||
elif isinstance(f, types.MethodType): | elif isinstance(f, types.MethodType): | ||||
@@ -156,7 +132,7 @@ def smart_decorator(f, create_decorator): | |||||
try: | try: | ||||
import regex | |||||
import regex # type: ignore | |||||
except ImportError: | except ImportError: | ||||
regex = None | regex = None | ||||
@@ -222,25 +198,6 @@ def dedup_list(l): | |||||
return [x for x in l if not (x in dedup or dedup.add(x))] | return [x for x in l if not (x in dedup or dedup.add(x))] | ||||
try: | |||||
from contextlib import suppress # Python 3 | |||||
except ImportError: | |||||
@contextmanager | |||||
def suppress(*excs): | |||||
'''Catch and dismiss the provided exception | |||||
>>> x = 'hello' | |||||
>>> with suppress(IndexError): | |||||
... x = x[10] | |||||
>>> x | |||||
'hello' | |||||
''' | |||||
try: | |||||
yield | |||||
except excs: | |||||
pass | |||||
class Enumerator(Serialize): | class Enumerator(Serialize): | ||||
def __init__(self): | def __init__(self): | ||||
self.enums = {} | self.enums = {} | ||||
@@ -284,7 +241,7 @@ def combine_alternatives(lists): | |||||
try: | try: | ||||
import atomicwrites | import atomicwrites | ||||
except ImportError: | except ImportError: | ||||
atomicwrites = None | |||||
atomicwrites = None # type: ignore | |||||
class FS: | class FS: | ||||
exists = staticmethod(os.path.exists) | exists = staticmethod(os.path.exists) | ||||
@@ -1,3 +1,5 @@ | |||||
from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional | |||||
from abc import ABC | |||||
from functools import wraps | from functools import wraps | ||||
from .utils import smart_decorator, combine_alternatives | from .utils import smart_decorator, combine_alternatives | ||||
@@ -8,6 +10,10 @@ from .lexer import Token | |||||
###{standalone | ###{standalone | ||||
from inspect import getmembers, getmro | from inspect import getmembers, getmro | ||||
_T = TypeVar('_T') | |||||
_R = TypeVar('_R') | |||||
_FUNC = Callable[..., _T] | |||||
_DECORATED = Union[_FUNC, type] | |||||
class Discard(Exception): | class Discard(Exception): | ||||
"""When raising the Discard exception in a transformer callback, | """When raising the Discard exception in a transformer callback, | ||||
@@ -46,7 +52,7 @@ class _Decoratable: | |||||
return cls | return cls | ||||
class Transformer(_Decoratable): | |||||
class Transformer(_Decoratable, ABC, Generic[_T]): | |||||
"""Transformers visit each node of the tree, and run the appropriate method on it according to the node's data. | """Transformers visit each node of the tree, and run the appropriate method on it according to the node's data. | ||||
Methods are provided by the user via inheritance, and called according to ``tree.data``. | Methods are provided by the user via inheritance, and called according to ``tree.data``. | ||||
@@ -74,7 +80,7 @@ class Transformer(_Decoratable): | |||||
""" | """ | ||||
__visit_tokens__ = True # For backwards compatibility | __visit_tokens__ = True # For backwards compatibility | ||||
def __init__(self, visit_tokens=True): | |||||
def __init__(self, visit_tokens: bool=True) -> None: | |||||
self.__visit_tokens__ = visit_tokens | self.__visit_tokens__ = visit_tokens | ||||
def _call_userfunc(self, tree, new_children=None): | def _call_userfunc(self, tree, new_children=None): | ||||
@@ -125,11 +131,11 @@ class Transformer(_Decoratable): | |||||
children = list(self._transform_children(tree.children)) | children = list(self._transform_children(tree.children)) | ||||
return self._call_userfunc(tree, children) | return self._call_userfunc(tree, children) | ||||
def transform(self, tree): | |||||
def transform(self, tree: Tree) -> _T: | |||||
"Transform the given tree, and return the final result" | "Transform the given tree, and return the final result" | ||||
return self._transform_tree(tree) | return self._transform_tree(tree) | ||||
def __mul__(self, other): | |||||
def __mul__(self, other: 'Transformer[_T]') -> 'TransformerChain[_T]': | |||||
"""Chain two transformers together, returning a new transformer. | """Chain two transformers together, returning a new transformer. | ||||
""" | """ | ||||
return TransformerChain(self, other) | return TransformerChain(self, other) | ||||
@@ -213,17 +219,19 @@ class InlineTransformer(Transformer): # XXX Deprecated | |||||
else: | else: | ||||
return f(*children) | return f(*children) | ||||
class TransformerChain(Generic[_T]): | |||||
transformers: Tuple[Transformer[_T], ...] | |||||
class TransformerChain(object): | |||||
def __init__(self, *transformers): | |||||
def __init__(self, *transformers: Transformer[_T]) -> None: | |||||
self.transformers = transformers | self.transformers = transformers | ||||
def transform(self, tree): | |||||
def transform(self, tree: Tree) -> _T: | |||||
for t in self.transformers: | for t in self.transformers: | ||||
tree = t.transform(tree) | tree = t.transform(tree) | ||||
return tree | return tree | ||||
def __mul__(self, other): | |||||
def __mul__(self, other: Transformer[_T]) -> 'TransformerChain[_T]': | |||||
return TransformerChain(*self.transformers + (other,)) | return TransformerChain(*self.transformers + (other,)) | ||||
@@ -304,19 +312,19 @@ class VisitorBase: | |||||
return cls | return cls | ||||
class Visitor(VisitorBase): | |||||
class Visitor(VisitorBase, ABC, Generic[_T]): | |||||
"""Tree visitor, non-recursive (can handle huge trees). | """Tree visitor, non-recursive (can handle huge trees). | ||||
Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data`` | Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data`` | ||||
""" | """ | ||||
def visit(self, tree): | |||||
def visit(self, tree: Tree) -> Tree: | |||||
"Visits the tree, starting with the leaves and finally the root (bottom-up)" | "Visits the tree, starting with the leaves and finally the root (bottom-up)" | ||||
for subtree in tree.iter_subtrees(): | for subtree in tree.iter_subtrees(): | ||||
self._call_userfunc(subtree) | self._call_userfunc(subtree) | ||||
return tree | return tree | ||||
def visit_topdown(self,tree): | |||||
def visit_topdown(self, tree: Tree) -> Tree: | |||||
"Visit the tree, starting at the root, and ending at the leaves (top-down)" | "Visit the tree, starting at the root, and ending at the leaves (top-down)" | ||||
for subtree in tree.iter_subtrees_topdown(): | for subtree in tree.iter_subtrees_topdown(): | ||||
self._call_userfunc(subtree) | self._call_userfunc(subtree) | ||||
@@ -331,7 +339,7 @@ class Visitor_Recursive(VisitorBase): | |||||
Slightly faster than the non-recursive version. | Slightly faster than the non-recursive version. | ||||
""" | """ | ||||
def visit(self, tree): | |||||
def visit(self, tree: Tree) -> Tree: | |||||
"Visits the tree, starting with the leaves and finally the root (bottom-up)" | "Visits the tree, starting with the leaves and finally the root (bottom-up)" | ||||
for child in tree.children: | for child in tree.children: | ||||
if isinstance(child, Tree): | if isinstance(child, Tree): | ||||
@@ -340,7 +348,7 @@ class Visitor_Recursive(VisitorBase): | |||||
self._call_userfunc(tree) | self._call_userfunc(tree) | ||||
return tree | return tree | ||||
def visit_topdown(self,tree): | |||||
def visit_topdown(self,tree: Tree) -> Tree: | |||||
"Visit the tree, starting at the root, and ending at the leaves (top-down)" | "Visit the tree, starting at the root, and ending at the leaves (top-down)" | ||||
self._call_userfunc(tree) | self._call_userfunc(tree) | ||||
@@ -351,16 +359,7 @@ class Visitor_Recursive(VisitorBase): | |||||
return tree | return tree | ||||
def visit_children_decor(func): | |||||
"See Interpreter" | |||||
@wraps(func) | |||||
def inner(cls, tree): | |||||
values = cls.visit_children(tree) | |||||
return func(cls, values) | |||||
return inner | |||||
class Interpreter(_Decoratable): | |||||
class Interpreter(_Decoratable, ABC, Generic[_T]): | |||||
"""Interpreter walks the tree starting at the root. | """Interpreter walks the tree starting at the root. | ||||
Visits the tree, starting with the root and finally the leaves (top-down) | Visits the tree, starting with the root and finally the leaves (top-down) | ||||
@@ -372,7 +371,7 @@ class Interpreter(_Decoratable): | |||||
This allows the user to implement branching and loops. | This allows the user to implement branching and loops. | ||||
""" | """ | ||||
def visit(self, tree): | |||||
def visit(self, tree: Tree) -> _T: | |||||
f = getattr(self, tree.data) | f = getattr(self, tree.data) | ||||
wrapper = getattr(f, 'visit_wrapper', None) | wrapper = getattr(f, 'visit_wrapper', None) | ||||
if wrapper is not None: | if wrapper is not None: | ||||
@@ -380,7 +379,7 @@ class Interpreter(_Decoratable): | |||||
else: | else: | ||||
return f(tree) | return f(tree) | ||||
def visit_children(self, tree): | |||||
def visit_children(self, tree: Tree) -> List[_T]: | |||||
return [self.visit(child) if isinstance(child, Tree) else child | return [self.visit(child) if isinstance(child, Tree) else child | ||||
for child in tree.children] | for child in tree.children] | ||||
@@ -391,6 +390,16 @@ class Interpreter(_Decoratable): | |||||
return self.visit_children(tree) | return self.visit_children(tree) | ||||
_InterMethod = Callable[[Type[Interpreter], _T], _R] | |||||
def visit_children_decor(func: _InterMethod) -> _InterMethod: | |||||
"See Interpreter" | |||||
@wraps(func) | |||||
def inner(cls, tree): | |||||
values = cls.visit_children(tree) | |||||
return func(cls, values) | |||||
return inner | |||||
# Decorators | # Decorators | ||||
def _apply_decorator(obj, decorator, **kwargs): | def _apply_decorator(obj, decorator, **kwargs): | ||||
@@ -416,10 +425,6 @@ def _inline_args__func(func): | |||||
return smart_decorator(func, create_decorator) | return smart_decorator(func, create_decorator) | ||||
def inline_args(obj): # XXX Deprecated | |||||
return _apply_decorator(obj, _inline_args__func) | |||||
def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | def _visitor_args_func_dec(func, visit_wrapper=None, static=False): | ||||
def create_decorator(_f, with_self): | def create_decorator(_f, with_self): | ||||
if with_self: | if with_self: | ||||
@@ -444,12 +449,12 @@ def _vargs_inline(f, _data, children, _meta): | |||||
def _vargs_meta_inline(f, _data, children, meta): | def _vargs_meta_inline(f, _data, children, meta): | ||||
return f(meta, *children) | return f(meta, *children) | ||||
def _vargs_meta(f, _data, children, meta): | def _vargs_meta(f, _data, children, meta): | ||||
return f(children, meta) # TODO swap these for consistency? Backwards incompatible! | |||||
return f(meta, children) | |||||
def _vargs_tree(f, data, children, meta): | def _vargs_tree(f, data, children, meta): | ||||
return f(Tree(data, children, meta)) | return f(Tree(data, children, meta)) | ||||
def v_args(inline=False, meta=False, tree=False, wrapper=None): | |||||
def v_args(inline: bool=False, meta: bool=False, tree: bool=False, wrapper: Optional[Callable]=None) -> Callable[[_DECORATED], _DECORATED]: | |||||
"""A convenience decorator factory for modifying the behavior of user-supplied visitor methods. | """A convenience decorator factory for modifying the behavior of user-supplied visitor methods. | ||||
By default, callback methods of transformers/visitors accept one argument - a list of the node's children. | By default, callback methods of transformers/visitors accept one argument - a list of the node's children. | ||||
@@ -48,18 +48,18 @@ Main Features: | |||||
- CYK parser, for highly ambiguous grammars | - CYK parser, for highly ambiguous grammars | ||||
- EBNF grammar | - EBNF grammar | ||||
- Unicode fully supported | - Unicode fully supported | ||||
- Python 2 & 3 compatible | |||||
- Automatic line & column tracking | - Automatic line & column tracking | ||||
- Standard library of terminals (strings, numbers, names, etc.) | - Standard library of terminals (strings, numbers, names, etc.) | ||||
- Import grammars from Nearley.js | - Import grammars from Nearley.js | ||||
- Extensive test suite | - Extensive test suite | ||||
- And much more! | - And much more! | ||||
Since version 1.0, only Python versions 3.6 and up are supported. | |||||
''', | ''', | ||||
classifiers=[ | classifiers=[ | ||||
"Development Status :: 5 - Production/Stable", | "Development Status :: 5 - Production/Stable", | ||||
"Intended Audience :: Developers", | "Intended Audience :: Developers", | ||||
"Programming Language :: Python :: 2.7", | |||||
"Programming Language :: Python :: 3", | "Programming Language :: Python :: 3", | ||||
"Topic :: Software Development :: Libraries :: Python Modules", | "Topic :: Software Development :: Libraries :: Python Modules", | ||||
"Topic :: Text Processing :: General", | "Topic :: Text Processing :: General", | ||||
@@ -1 +1 @@ | |||||
Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44 | |||||
Subproject commit 326831689826cb1b9a4d21d1ce0d5db9278e9636 |
@@ -8,7 +8,7 @@ import os | |||||
import sys | import sys | ||||
from copy import copy, deepcopy | from copy import copy, deepcopy | ||||
from lark.utils import Py36, isascii | |||||
from lark.utils import isascii | |||||
from lark import Token, Transformer_NonRecursive, LexError | from lark import Token, Transformer_NonRecursive, LexError | ||||
@@ -208,11 +208,11 @@ class TestParsers(unittest.TestCase): | |||||
@v_args(meta=True) | @v_args(meta=True) | ||||
class T1(Transformer): | class T1(Transformer): | ||||
def a(self, children, meta): | |||||
def a(self, meta, children): | |||||
assert not children | assert not children | ||||
return meta.line | return meta.line | ||||
def start(self, children, meta): | |||||
def start(self, meta, children): | |||||
return children | return children | ||||
@v_args(meta=True, inline=True) | @v_args(meta=True, inline=True) | ||||
@@ -1565,7 +1565,6 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual( g.parse('"hello"').children, ['"hello"']) | self.assertEqual( g.parse('"hello"').children, ['"hello"']) | ||||
self.assertEqual( g.parse("'hello'").children, ["'hello'"]) | self.assertEqual( g.parse("'hello'").children, ["'hello'"]) | ||||
@unittest.skipIf(not Py36, "Required re syntax only exists in python3.6+") | |||||
def test_join_regex_flags(self): | def test_join_regex_flags(self): | ||||
g = r""" | g = r""" | ||||
start: A | start: A | ||||
@@ -183,8 +183,8 @@ class TestReconstructor(TestCase): | |||||
keyword x += y | keyword x += y | ||||
""" | """ | ||||
l1 = Lark(g1, parser='lalr') | |||||
l2 = Lark(g2, parser='lalr') | |||||
l1 = Lark(g1, parser='lalr', maybe_placeholders=False) | |||||
l2 = Lark(g2, parser='lalr', maybe_placeholders=False) | |||||
r = Reconstructor(l2) | r = Reconstructor(l2) | ||||
tree = l1.parse(code) | tree = l1.parse(code) | ||||
@@ -24,7 +24,7 @@ class TestStandalone(TestCase): | |||||
standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress) | standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress) | ||||
code = code_buf.getvalue() | code = code_buf.getvalue() | ||||
context = {'__doc__': None} | |||||
context = {'__doc__': None, '__name__': 'test_standalone'} | |||||
exec(code, context) | exec(code, context) | ||||
return context | return context | ||||
@@ -1,5 +1,5 @@ | |||||
[tox] | [tox] | ||||
envlist = py27, py34, py35, py36, py37, py38, py39, py310, pypy, pypy3 | |||||
envlist = py36, py37, py38, py39, pypy, pypy3 | |||||
skip_missing_interpreters=true | skip_missing_interpreters=true | ||||
[testenv] | [testenv] | ||||