Browse Source

Merge branch 'v1.0'

remotes/origin/gm/2021-09-23T00Z/github.com--lark-parser-lark/master
Erez Sh 3 years ago
parent
commit
efc881a00d
41 changed files with 473 additions and 1063 deletions
  1. +1
    -1
      .github/workflows/mypy.yml
  2. +1
    -1
      .github/workflows/tests.yml
  3. +0
    -12
      lark-stubs/__init__.pyi
  4. +0
    -17
      lark-stubs/ast_utils.pyi
  5. +0
    -65
      lark-stubs/exceptions.pyi
  6. +0
    -14
      lark-stubs/grammar.pyi
  7. +0
    -47
      lark-stubs/indenter.pyi
  8. +0
    -109
      lark-stubs/lark.pyi
  9. +0
    -161
      lark-stubs/lexer.pyi
  10. +0
    -31
      lark-stubs/load_grammar.pyi
  11. +0
    -0
      lark-stubs/parsers/__init__.pyi
  12. +0
    -39
      lark-stubs/reconstruct.pyi
  13. +0
    -75
      lark-stubs/tree.pyi
  14. +0
    -108
      lark-stubs/visitors.pyi
  15. +9
    -0
      lark/CHANGELOG.md
  16. +1
    -2
      lark/__init__.py
  17. +6
    -2
      lark/ast_utils.py
  18. +17
    -8
      lark/common.py
  19. +32
    -17
      lark/exceptions.py
  20. +16
    -5
      lark/grammar.py
  21. +2
    -2
      lark/grammars/python.lark
  22. +42
    -5
      lark/indenter.py
  23. +73
    -61
      lark/lark.py
  24. +107
    -74
      lark/lexer.py
  25. +35
    -33
      lark/load_grammar.py
  26. +5
    -14
      lark/parse_tree_builder.py
  27. +12
    -10
      lark/parser_frontends.py
  28. +0
    -4
      lark/parsers/lalr_interactive_parser.py
  29. +0
    -3
      lark/parsers/lalr_puppet.py
  30. +12
    -5
      lark/reconstruct.py
  31. +3
    -2
      lark/tools/nearley.py
  32. +10
    -16
      lark/tools/standalone.py
  33. +39
    -31
      lark/tree.py
  34. +4
    -47
      lark/utils.py
  35. +36
    -31
      lark/visitors.py
  36. +2
    -2
      setup.py
  37. +1
    -1
      tests/test_nearley/nearley
  38. +3
    -4
      tests/test_parser.py
  39. +2
    -2
      tests/test_reconstructor.py
  40. +1
    -1
      tests/test_tools.py
  41. +1
    -1
      tox.ini

+ 1
- 1
.github/workflows/mypy.yml View File

@@ -16,4 +16,4 @@ jobs:
python -m pip install --upgrade pip
pip install mypy
- name: Lint with mypy
run: mypy -p lark-stubs || true
run: mypy -p lark || true

+ 1
- 1
.github/workflows/tests.yml View File

@@ -6,7 +6,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [2.7, 3.5, 3.6, 3.7, 3.8, 3.9, 3.10.0-rc - 3.10, pypy2, pypy3]
python-version: [3.6, 3.7, 3.8, 3.9, 3.10.0-rc - 3.10, pypy3]

steps:
- uses: actions/checkout@v2


+ 0
- 12
lark-stubs/__init__.pyi View File

@@ -1,12 +0,0 @@
# -*- coding: utf-8 -*-

from .tree import *
from .visitors import *
from .exceptions import *
from .lexer import *
from .load_grammar import *
from .lark import *
from logging import Logger as _Logger

logger: _Logger
__version__: str = ...

+ 0
- 17
lark-stubs/ast_utils.pyi View File

@@ -1,17 +0,0 @@
import types
from typing import Optional

from .visitors import Transformer

class Ast(object):
pass

class AsList(object):
pass


def create_transformer(
ast_module: types.ModuleType,
transformer: Optional[Transformer]=None
) -> Transformer:
...

+ 0
- 65
lark-stubs/exceptions.pyi View File

@@ -1,65 +0,0 @@
# -*- coding: utf-8 -*-

from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
from .tree import Tree
from .lexer import Token
from .parsers.lalr_interactive_parser import InteractiveParser

class LarkError(Exception):
pass


class ConfigurationError(LarkError, ValueError):
pass


class GrammarError(LarkError):
pass


class ParseError(LarkError):
pass


class LexError(LarkError):
pass


T = TypeVar('T')

class UnexpectedEOF(ParseError):
expected: List[Token]

class UnexpectedInput(LarkError):
line: int
column: int
pos_in_stream: int
state: Any

def get_context(self, text: str, span: int = ...) -> str:
...

def match_examples(
self,
parse_fn: Callable[[str], Tree],
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
token_type_match_fallback: bool = False,
use_accepts: bool = False,
) -> T:
...


class UnexpectedToken(ParseError, UnexpectedInput):
expected: Set[str]
considered_rules: Set[str]
interactive_parser: InteractiveParser
accepts: Set[str]

class UnexpectedCharacters(LexError, UnexpectedInput):
allowed: Set[str]
considered_tokens: Set[Any]


class VisitError(LarkError):
obj: Union[Tree, Token]
orig_exc: Exception

+ 0
- 14
lark-stubs/grammar.pyi View File

@@ -1,14 +0,0 @@
from typing import Optional, Tuple


class RuleOptions:
keep_all_tokens: bool
expand1: bool
priority: int
template_source: Optional[str]
empty_indices: Tuple[bool, ...]


class Symbol:
name: str
is_term: bool

+ 0
- 47
lark-stubs/indenter.pyi View File

@@ -1,47 +0,0 @@
# -*- coding: utf-8 -*-

from typing import Tuple, List, Iterator, Optional
from abc import ABC, abstractmethod
from .lexer import Token
from .lark import PostLex


class Indenter(PostLex, ABC):
paren_level: Optional[int]
indent_level: Optional[List[int]]

def __init__(self) -> None:
...

def handle_NL(self, token: Token) -> Iterator[Token]:
...

@property
@abstractmethod
def NL_type(self) -> str:
...

@property
@abstractmethod
def OPEN_PAREN_types(self) -> List[str]:
...

@property
@abstractmethod
def CLOSE_PAREN_types(self) -> List[str]:
...

@property
@abstractmethod
def INDENT_type(self) -> str:
...

@property
@abstractmethod
def DEDENT_type(self) -> str:
...

@property
@abstractmethod
def tab_len(self) -> int:
...

+ 0
- 109
lark-stubs/lark.pyi View File

@@ -1,109 +0,0 @@
# -*- coding: utf-8 -*-

from typing import (
TypeVar, Type, List, Dict, IO, Iterator, Callable, Union, Optional,
Literal, Protocol, Tuple, Iterable,
)

from .parsers.lalr_interactive_parser import InteractiveParser
from .visitors import Transformer
from .lexer import Token, Lexer, TerminalDef
from .tree import Tree
from .exceptions import UnexpectedInput
from .load_grammar import Grammar

_T = TypeVar('_T')


class PostLex(Protocol):

def process(self, stream: Iterator[Token]) -> Iterator[Token]:
...

always_accept: Iterable[str]


class LarkOptions:
start: List[str]
parser: str
lexer: str
transformer: Optional[Transformer]
postlex: Optional[PostLex]
ambiguity: str
regex: bool
debug: bool
keep_all_tokens: bool
propagate_positions: Union[bool, Callable]
maybe_placeholders: bool
lexer_callbacks: Dict[str, Callable[[Token], Token]]
cache: Union[bool, str]
g_regex_flags: int
use_bytes: bool
import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]
source_path: Optional[str]


class PackageResource(object):
pkg_name: str
path: str

def __init__(self, pkg_name: str, path: str): ...


class FromPackageLoader:
def __init__(self, pkg_name: str, search_paths: Tuple[str, ...] = ...): ...

def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]: ...


class Lark:
source_path: str
source_grammar: str
grammar: Grammar
options: LarkOptions
lexer: Lexer
terminals: List[TerminalDef]

def __init__(
self,
grammar: Union[Grammar, str, IO[str]],
*,
start: Union[None, str, List[str]] = "start",
parser: Literal["earley", "lalr", "cyk", "auto"] = "auto",
lexer: Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]] = "auto",
transformer: Optional[Transformer] = None,
postlex: Optional[PostLex] = None,
ambiguity: Literal["explicit", "resolve"] = "resolve",
regex: bool = False,
debug: bool = False,
keep_all_tokens: bool = False,
propagate_positions: Union[bool, Callable] = False,
maybe_placeholders: bool = False,
lexer_callbacks: Optional[Dict[str, Callable[[Token], Token]]] = None,
cache: Union[bool, str] = False,
g_regex_flags: int = ...,
use_bytes: bool = False,
import_paths: List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]] = ...,
source_path: Optional[str]=None,
):
...

def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
...

def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser:
...

@classmethod
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options) -> _T:
...

@classmethod
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...] = ..., **options) -> _T:
...

def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]:
...

def get_terminal(self, name: str) -> TerminalDef:
...

+ 0
- 161
lark-stubs/lexer.pyi View File

@@ -1,161 +0,0 @@
# -*- coding: utf-8 -*-
from types import ModuleType
from typing import (
TypeVar, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Pattern as REPattern,
)
from abc import abstractmethod, ABC

_T = TypeVar('_T')


class Pattern(ABC):
value: str
flags: Collection[str]
raw: str
type: str

def __init__(self, value: str, flags: Collection[str] = (), raw: str = None) -> None:
...

@abstractmethod
def to_regexp(self) -> str:
...

@property
@abstractmethod
def min_width(self) -> int:
...

@property
@abstractmethod
def max_width(self) -> int:
...


class PatternStr(Pattern):
type: str = ...

def to_regexp(self) -> str:
...

@property
def min_width(self) -> int:
...

@property
def max_width(self) -> int:
...


class PatternRE(Pattern):
type: str = ...

def to_regexp(self) -> str:
...

@property
def min_width(self) -> int:
...

@property
def max_width(self) -> int:
...


class TerminalDef:
name: str
pattern: Pattern
priority: int

def __init__(self, name: str, pattern: Pattern, priority: int = ...) -> None:
...

def user_repr(self) -> str: ...


class Token(str):
type: str
start_pos: int
value: Any
line: int
column: int
end_line: int
end_column: int
end_pos: int

def __init__(self, type_: str, value: Any, start_pos: int = None, line: int = None, column: int = None, end_line: int = None, end_column: int = None, end_pos: int = None) -> None:
...

def update(self, type_: Optional[str] = None, value: Optional[Any] = None) -> Token:
...

@classmethod
def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: Token) -> _T:
...


_Callback = Callable[[Token], Token]


class Lexer(ABC):
lex: Callable[..., Iterator[Token]]


class LexerConf:
tokens: Collection[TerminalDef]
re_module: ModuleType
ignore: Collection[str] = ()
postlex: Any =None
callbacks: Optional[Dict[str, _Callback]] = None
g_regex_flags: int = 0
skip_validation: bool = False
use_bytes: bool = False



class TraditionalLexer(Lexer):
terminals: Collection[TerminalDef]
ignore_types: FrozenSet[str]
newline_types: FrozenSet[str]
user_callbacks: Dict[str, _Callback]
callback: Dict[str, _Callback]
mres: List[Tuple[REPattern, Dict[int, str]]]
re: ModuleType

def __init__(
self,
conf: LexerConf
) -> None:
...

def build(self) -> None:
...

def match(self, stream: str, pos: int) -> Optional[Tuple[str, str]]:
...

def lex(self, stream: str) -> Iterator[Token]:
...

def next_token(self, lex_state: Any, parser_state: Any = None) -> Token:
...

class ContextualLexer(Lexer):
lexers: Dict[str, TraditionalLexer]
root_lexer: TraditionalLexer

def __init__(
self,
terminals: Collection[TerminalDef],
states: Dict[str, Collection[str]],
re_: ModuleType,
ignore: Collection[str] = ...,
always_accept: Collection[str] = ...,
user_callbacks: Dict[str, _Callback] = ...,
g_regex_flags: int = ...
) -> None:
...

def lex(self, stream: str, get_parser_state: Callable[[], str]) -> Iterator[Token]:
...

+ 0
- 31
lark-stubs/load_grammar.pyi View File

@@ -1,31 +0,0 @@
from typing import List, Tuple, Union, Callable, Dict, Optional

from .tree import Tree
from .grammar import RuleOptions
from .exceptions import UnexpectedInput


class Grammar:
rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]]
term_defs: List[Tuple[str, Tuple[Tree, int]]]
ignore: List[str]


class GrammarBuilder:
global_keep_all_tokens: bool
import_paths: List[Union[str, Callable]]
used_files: Dict[str, str]

def __init__(self, global_keep_all_tokens: bool = False, import_paths: List[Union[str, Callable]] = None, used_files: Dict[str, str]=None) -> None: ...

def load_grammar(self, grammar_text: str, grammar_name: str = ..., mangle: Callable[[str], str] = None) -> None: ...

def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str],
base_mangle: Callable[[str], str] = None) -> None: ...

def validate(self) -> None: ...

def build(self) -> Grammar: ...


def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]: ...

+ 0
- 0
lark-stubs/parsers/__init__.pyi View File


+ 0
- 39
lark-stubs/reconstruct.pyi View File

@@ -1,39 +0,0 @@
# -*- coding: utf-8 -*-

from typing import List, Dict, Union, Callable, Iterable

from .grammar import Symbol
from .lark import Lark
from .tree import Tree
from .visitors import Transformer_InPlace
from .lexer import TerminalDef


class WriteTokensTransformer(Transformer_InPlace):

def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]] = ...): ...


class MatchTree(Tree):
pass


class MakeMatchTree:
name: str
expansion: List[TerminalDef]

def __init__(self, name: str, expansion: List[TerminalDef]):
...

def __call__(self, args: List[Union[str, Tree]]):
...


class Reconstructor:

def __init__(self, parser: Lark, term_subs: Dict[str, Callable[[Symbol], str]] = ...):
...

def reconstruct(self, tree: Tree, postproc: Callable[[Iterable[str]], Iterable[str]]=None,
insert_spaces: bool = True) -> str:
...

+ 0
- 75
lark-stubs/tree.pyi View File

@@ -1,75 +0,0 @@
# -*- coding: utf-8 -*-

from typing import List, Callable, Iterator, Union, Optional, Literal, Any
from .lexer import TerminalDef

class Meta:
empty: bool
line: int
column: int
start_pos: int
end_line: int
end_column: int
end_pos: int
orig_expansion: List[TerminalDef]
match_tree: bool


class Tree:
data: str
children: List[Union[str, Tree]]
meta: Meta

def __init__(
self,
data: str,
children: List[Union[str, Tree]],
meta: Optional[Meta] = None
) -> None:
...

def pretty(self, indent_str: str = ...) -> str:
...

def find_pred(self, pred: Callable[[Tree], bool]) -> Iterator[Tree]:
...

def find_data(self, data: str) -> Iterator[Tree]:
...

def expand_kids_by_index(self, *indices: int) -> None:
...

def expand_kids_by_data(self, *data_values: str) -> bool:
...

def scan_values(self, pred: Callable[[Union[str, Tree]], bool]) -> Iterator[str]:
...

def iter_subtrees(self) -> Iterator[Tree]:
...

def iter_subtrees_topdown(self) -> Iterator[Tree]:
...

def copy(self) -> Tree:
...

def set(self, data: str, children: List[Union[str, Tree]]) -> None:
...

def __hash__(self) -> int:
...


class SlottedTree(Tree):
pass


def pydot__tree_to_png(
tree: Tree,
filename: str,
rankdir: Literal["TB", "LR", "BT", "RL"] = ...,
**kwargs
) -> None:
...

+ 0
- 108
lark-stubs/visitors.pyi View File

@@ -1,108 +0,0 @@
# -*- coding: utf-8 -*-

from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union
from abc import ABC
from .tree import Tree

_T = TypeVar('_T')
_R = TypeVar('_R')
_FUNC = Callable[..., _T]
_DECORATED = Union[_FUNC, type]


class Transformer(ABC, Generic[_T]):

def __init__(self, visit_tokens: bool = True) -> None:
...

def transform(self, tree: Tree) -> _T:
...

def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]:
...


class TransformerChain(Generic[_T]):
transformers: Tuple[Transformer[_T], ...]

def __init__(self, *transformers: Transformer[_T]) -> None:
...

def transform(self, tree: Tree) -> _T:
...

def __mul__(self, other: Transformer[_T]) -> TransformerChain[_T]:
...


class Transformer_InPlace(Transformer):
pass


class Transformer_NonRecursive(Transformer):
pass


class Transformer_InPlaceRecursive(Transformer):
pass


class VisitorBase:
pass


class Visitor(VisitorBase, ABC, Generic[_T]):

def visit(self, tree: Tree) -> Tree:
...

def visit_topdown(self, tree: Tree) -> Tree:
...


class Visitor_Recursive(VisitorBase):

def visit(self, tree: Tree) -> Tree:
...

def visit_topdown(self, tree: Tree) -> Tree:
...


class Interpreter(ABC, Generic[_T]):

def visit(self, tree: Tree) -> _T:
...

def visit_children(self, tree: Tree) -> List[_T]:
...


_InterMethod = Callable[[Type[Interpreter], _T], _R]


def v_args(
inline: bool = False,
meta: bool = False,
tree: bool = False,
wrapper: Callable = None
) -> Callable[[_DECORATED], _DECORATED]:
...


def visit_children_decor(func: _InterMethod) -> _InterMethod:
...


class Discard(Exception):
pass


# Deprecated
class InlineTransformer:
pass


# Deprecated
def inline_args(obj: _FUNC) -> _FUNC:
...

+ 9
- 0
lark/CHANGELOG.md View File

@@ -0,0 +1,9 @@
v1.0
- `maybe_placeholders` is now True by default
- `use_accepts` in `UnexpectedInput.match_examples()` is now True by default
- Token priority is now 0 by default
- `v_args(meta=True)` now gives meta as the first argument. i.e. `(meta, children)`

+ 1
- 2
lark/__init__.py View File

@@ -1,10 +1,9 @@
from .utils import logger
from .tree import Tree
from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive
from .visitors import InlineTransformer, inline_args # XXX Deprecated
from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError)
from .lexer import Token
from .lark import Lark

__version__ = "0.12.0"
__version__: str = "1.0.0a"

+ 6
- 2
lark/ast_utils.py View File

@@ -3,6 +3,8 @@
"""

import inspect, re
import types
from typing import Optional, Callable

from lark import Transformer, v_args

@@ -29,7 +31,9 @@ class WithMeta(object):
def camel_to_snake(name):
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()

def create_transformer(ast_module, transformer=None, decorator_factory=v_args):
def create_transformer(ast_module: types.ModuleType,
transformer: Optional[Transformer]=None,
decorator_factory: Callable=v_args) -> Transformer:
"""Collects `Ast` subclasses from the given module, and creates a Lark transformer that builds the AST.

For each class, we create a corresponding rule in the transformer, with a matching name.
@@ -52,4 +56,4 @@ def create_transformer(ast_module, transformer=None, decorator_factory=v_args):
obj = wrapper(obj).__get__(t)
setattr(t, camel_to_snake(name), obj)

return t
return t

+ 17
- 8
lark/common.py View File

@@ -1,17 +1,31 @@
from warnings import warn
from copy import deepcopy
from types import ModuleType
from typing import Callable, Collection, Dict, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from .lark import PostLex

from .utils import Serialize
from .lexer import TerminalDef
from .lexer import TerminalDef, Token

###{standalone

_Callback = Callable[[Token], Token]

class LexerConf(Serialize):
__serialize_fields__ = 'terminals', 'ignore', 'g_regex_flags', 'use_bytes', 'lexer_type'
__serialize_namespace__ = TerminalDef,

def __init__(self, terminals, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False, use_bytes=False):
terminals: Collection[TerminalDef]
re_module: ModuleType
ignore: Collection[str]
postlex: 'Optional[PostLex]'
callbacks: Dict[str, _Callback]
g_regex_flags: int
skip_validation: bool
use_bytes: bool

def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False):
self.terminals = terminals
self.terminals_by_name = {t.name: t for t in self.terminals}
assert len(self.terminals) == len(self.terminals_by_name)
@@ -24,11 +38,6 @@ class LexerConf(Serialize):
self.use_bytes = use_bytes
self.lexer_type = None

@property
def tokens(self):
warn("LexerConf.tokens is deprecated. Use LexerConf.terminals instead", DeprecationWarning)
return self.terminals

def _deserialize(self):
self.terminals_by_name = {t.name: t for t in self.terminals}



+ 32
- 17
lark/exceptions.py View File

@@ -1,11 +1,13 @@
from warnings import warn

from .utils import STRING_TYPE, logger, NO_VALUE
from .utils import logger, NO_VALUE
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING

if TYPE_CHECKING:
from .lexer import Token
from .parsers.lalr_interactive_parser import InteractiveParser
from .tree import Tree

###{standalone


class LarkError(Exception):
pass

@@ -14,7 +16,7 @@ class ConfigurationError(LarkError, ValueError):
pass


def assert_config(value, options, msg='Got %r, expected one of %s'):
def assert_config(value, options: Collection, msg='Got %r, expected one of %s'):
if value not in options:
raise ConfigurationError(msg % (value, options))

@@ -30,6 +32,7 @@ class ParseError(LarkError):
class LexError(LarkError):
pass

T = TypeVar('T')

class UnexpectedInput(LarkError):
"""UnexpectedInput Error.
@@ -42,10 +45,13 @@ class UnexpectedInput(LarkError):

After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
"""
line: int
column: int
pos_in_stream = None
state: Any
_terminals_by_name = None

def get_context(self, text, span=40):
def get_context(self, text: str, span: int=40) -> str:
"""Returns a pretty string pinpointing the error in the text,
with span amount of context characters around it.

@@ -66,7 +72,11 @@ class UnexpectedInput(LarkError):
after = text[pos:end].split(b'\n', 1)[0]
return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")

def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
def match_examples(self, parse_fn: 'Callable[[str], Tree]',
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
token_type_match_fallback: bool=False,
use_accepts: bool=True
) -> Optional[T]:
"""Allows you to detect what's wrong in the input text by matching
against example errors.

@@ -81,8 +91,7 @@ class UnexpectedInput(LarkError):
Parameters:
parse_fn: parse function (usually ``lark_instance.parse``)
examples: dictionary of ``{'example_string': value}``.
use_accepts: Recommended to call this with ``use_accepts=True``.
The default is ``False`` for backwards compatibility.
use_accepts: Recommended to keep this as ``use_accepts=True``.
"""
assert self.state is not None, "Not supported for this exception"

@@ -91,14 +100,14 @@ class UnexpectedInput(LarkError):

candidate = (None, False)
for i, (label, example) in enumerate(examples):
assert not isinstance(example, STRING_TYPE)
assert not isinstance(example, str), "Expecting a list"

for j, malformed in enumerate(example):
try:
parse_fn(malformed)
except UnexpectedInput as ut:
if ut.state == self.state:
if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts:
if use_accepts and hasattr(self, 'accepts') and hasattr(ut, 'accepts') and ut.accepts != self.accepts:
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j))
continue
@@ -131,6 +140,7 @@ class UnexpectedInput(LarkError):
class UnexpectedEOF(ParseError, UnexpectedInput):
"""An exception that is raised by the parser, when the input ends while it still expects a token.
"""
expected: 'List[Token]'

def __init__(self, expected, state=None, terminals_by_name=None):
super(UnexpectedEOF, self).__init__()
@@ -156,6 +166,9 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
string of characters to any of its terminals.
"""

allowed: Set[str]
considered_tokens: Set[Any]

def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
terminals_by_name=None, considered_rules=None):
super(UnexpectedCharacters, self).__init__()
@@ -204,6 +217,10 @@ class UnexpectedToken(ParseError, UnexpectedInput):
Note: These parameters are available as attributes of the instance.
"""

expected: Set[str]
considered_rules: Set[str]
interactive_parser: 'InteractiveParser'

def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
super(UnexpectedToken, self).__init__()
@@ -223,7 +240,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):


@property
def accepts(self):
def accepts(self) -> Set[str]:
if self._accepts is NO_VALUE:
self._accepts = self.interactive_parser and self.interactive_parser.accepts()
return self._accepts
@@ -236,11 +253,6 @@ class UnexpectedToken(ParseError, UnexpectedInput):

return message

@property
def puppet(self):
warn("UnexpectedToken.puppet attribute has been renamed to interactive_parser", DeprecationWarning)
return self.interactive_parser


class VisitError(LarkError):
@@ -256,6 +268,9 @@ class VisitError(LarkError):
Note: These parameters are available as attributes
"""

obj: 'Union[Tree, Token]'
orig_exc: Exception

def __init__(self, rule, obj, orig_exc):
message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
super(VisitError, self).__init__(message)


+ 16
- 5
lark/grammar.py View File

@@ -1,13 +1,18 @@
from typing import Optional, Tuple, ClassVar

from .utils import Serialize

###{standalone
TOKEN_DEFAULT_PRIORITY = 0


class Symbol(Serialize):
__slots__ = ('name',)

is_term = NotImplemented
name: str
is_term: ClassVar[bool] = NotImplemented

def __init__(self, name):
def __init__(self, name: str) -> None:
self.name = name

def __eq__(self, other):
@@ -29,7 +34,7 @@ class Symbol(Serialize):
class Terminal(Symbol):
__serialize_fields__ = 'name', 'filter_out'

is_term = True
is_term: ClassVar[bool] = True

def __init__(self, name, filter_out=False):
self.name = name
@@ -43,13 +48,19 @@ class Terminal(Symbol):
class NonTerminal(Symbol):
__serialize_fields__ = 'name',

is_term = False
is_term: ClassVar[bool] = False


class RuleOptions(Serialize):
__serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices'

def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()):
keep_all_tokens: bool
expand1: bool
priority: Optional[int]
template_source: Optional[str]
empty_indices: Tuple[bool, ...]

def __init__(self, keep_all_tokens: bool=False, expand1: bool=False, priority: Optional[int]=None, template_source: Optional[str]=None, empty_indices: Tuple[bool, ...]=()) -> None:
self.keep_all_tokens = keep_all_tokens
self.expand1 = expand1
self.priority = priority


+ 2
- 2
lark/grammars/python.lark View File

@@ -10,8 +10,8 @@ DEC_NUMBER: /0|[1-9][\d_]*/i
HEX_NUMBER.2: /0x[\da-f]*/i
OCT_NUMBER.2: /0o[0-7]*/i
BIN_NUMBER.2 : /0b[0-1]*/i
FLOAT_NUMBER.2: /((\d+\.[\d_]*|\.[\d_]+)([eE][-+]?\d+)?|\d+([eE][-+]?\d+))/
IMAG_NUMBER.2: /\d+[jJ]/ | FLOAT_NUMBER /[jJ]/
FLOAT_NUMBER.2: /((\d+\.[\d_]*|\.[\d_]+)([Ee][-+]?\d+)?|\d+([Ee][-+]?\d+))/
IMAG_NUMBER.2: /\d+[Jj]/ | FLOAT_NUMBER /[Jj]/


// Comma-separated list (with an optional trailing comma)


+ 42
- 5
lark/indenter.py View File

@@ -1,20 +1,27 @@
"Provides Indentation services for languages with indentation similar to Python"

from abc import ABC, abstractmethod
from typing import List, Iterator

from .exceptions import LarkError
from .lark import PostLex
from .lexer import Token

###{standalone

class DedentError(LarkError):
pass

class Indenter(PostLex):
def __init__(self):
self.paren_level = None
self.indent_level = None
class Indenter(PostLex, ABC):
paren_level: int
indent_level: List[int]

def __init__(self) -> None:
self.paren_level = 0
self.indent_level = [0]
assert self.tab_len > 0

def handle_NL(self, token):
def handle_NL(self, token: Token) -> Iterator[Token]:
if self.paren_level > 0:
return

@@ -64,4 +71,34 @@ class Indenter(PostLex):
def always_accept(self):
return (self.NL_type,)

@property
@abstractmethod
def NL_type(self) -> str:
raise NotImplementedError()

@property
@abstractmethod
def OPEN_PAREN_types(self) -> List[str]:
raise NotImplementedError()

@property
@abstractmethod
def CLOSE_PAREN_types(self) -> List[str]:
raise NotImplementedError()

@property
@abstractmethod
def INDENT_type(self) -> str:
raise NotImplementedError()

@property
@abstractmethod
def DEDENT_type(self) -> str:
raise NotImplementedError()

@property
@abstractmethod
def tab_len(self) -> int:
raise NotImplementedError()

###}

+ 73
- 61
lark/lark.py View File

@@ -1,26 +1,32 @@
from __future__ import absolute_import


from lark.exceptions import ConfigurationError, assert_config

from abc import ABC, abstractmethod
import sys, os, pickle, hashlib
from io import open
import tempfile
from warnings import warn

from .utils import STRING_TYPE, Serialize, SerializeMemoizer, FS, isascii, logger, ABC, abstractmethod
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files
from typing import (
TypeVar, Type, List, Dict, Iterator, Callable, Union, Optional,
Tuple, Iterable, IO, Any, TYPE_CHECKING
)
if TYPE_CHECKING:
from .parsers.lalr_interactive_parser import InteractiveParser
from .visitors import Transformer
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal
from .exceptions import ConfigurationError, assert_config, UnexpectedInput
from .utils import Serialize, SerializeMemoizer, FS, isascii, logger
from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource
from .tree import Tree
from .common import LexerConf, ParserConf

from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread
from .lexer import Lexer, TraditionalLexer, TerminalDef, LexerThread, Token
from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import get_frontend, _get_lexer_callbacks
from .grammar import Rule

import re
try:
import regex
import regex # type: ignore
except ImportError:
regex = None

@@ -28,10 +34,39 @@ except ImportError:
###{standalone


class PostLex(ABC):
@abstractmethod
def process(self, stream: Iterator[Token]) -> Iterator[Token]:
return stream

always_accept: Iterable[str] = ()

class LarkOptions(Serialize):
"""Specifies the options for Lark

"""

start: List[str]
debug: bool
transformer: 'Optional[Transformer]'
propagate_positions: Union[bool, str]
maybe_placeholders: bool
cache: Union[bool, str]
regex: bool
g_regex_flags: int
keep_all_tokens: bool
tree_class: Any
parser: 'Literal["earley", "lalr", "cyk", "auto"]'
lexer: 'Union[Literal["auto", "standard", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
postlex: Optional[PostLex]
priority: 'Optional[Literal["auto", "normal", "invert"]]'
lexer_callbacks: Dict[str, Callable[[Token], Token]]
use_bytes: bool
edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
import_paths: 'List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]'
source_path: Optional[str]

OPTIONS_DOC = """
**=== General Options ===**

@@ -47,9 +82,8 @@ class LarkOptions(Serialize):
Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
maybe_placeholders
When ``True``, the ``[]`` operator returns ``None`` when not matched.

When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
(default= ``False``. Recommended to set to ``True``)
(default= ``True``)
cache
Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.

@@ -111,12 +145,10 @@ class LarkOptions(Serialize):
# Adding a new option needs to be done in multiple places:
# - In the dictionary below. This is the primary truth of which options `Lark.__init__` accepts
# - In the docstring above. It is used both for the docstring of `LarkOptions` and `Lark`, and in readthedocs
# - In `lark-stubs/lark.pyi`:
# - As attribute to `LarkOptions`
# - As parameter to `Lark.__init__`
# - As an attribute of `LarkOptions` above
# - Potentially in `_LOAD_ALLOWED_OPTIONS` below this class, when the option doesn't change how the grammar is loaded
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
_defaults = {
_defaults: Dict[str, Any] = {
'debug': False,
'keep_all_tokens': False,
'tree_class': None,
@@ -131,7 +163,7 @@ class LarkOptions(Serialize):
'regex': False,
'propagate_positions': False,
'lexer_callbacks': {},
'maybe_placeholders': False,
'maybe_placeholders': True,
'edit_terminals': None,
'g_regex_flags': 0,
'use_bytes': False,
@@ -153,7 +185,7 @@ class LarkOptions(Serialize):

options[name] = value

if isinstance(options['start'], STRING_TYPE):
if isinstance(options['start'], str):
options['start'] = [options['start']]

self.__dict__['options'] = options
@@ -194,13 +226,7 @@ _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')


class PostLex(ABC):
@abstractmethod
def process(self, stream):
return stream

always_accept = ()

_T = TypeVar('_T')

class Lark(Serialize):
"""Main interface for the library.
@@ -215,7 +241,15 @@ class Lark(Serialize):
>>> Lark(r'''start: "foo" ''')
Lark(...)
"""
def __init__(self, grammar, **options):

source_path: str
source_grammar: str
grammar: 'Grammar'
options: LarkOptions
lexer: Lexer
terminals: List[TerminalDef]

def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
self.options = LarkOptions(options)

# Set regex or re module
@@ -247,14 +281,11 @@ class Lark(Serialize):

cache_fn = None
cache_md5 = None
if isinstance(grammar, STRING_TYPE):
if isinstance(grammar, str):
self.source_grammar = grammar
if self.options.use_bytes:
if not isascii(grammar):
raise ConfigurationError("Grammar must be ascii only, when use_bytes=True")
if sys.version_info[0] == 2 and self.options.use_bytes != 'force':
raise ConfigurationError("`use_bytes=True` may have issues on python2."
"Use `use_bytes='force'` to use it at your own risk.")

if self.options.cache:
if self.options.parser != 'lalr':
@@ -266,13 +297,13 @@ class Lark(Serialize):
s = grammar + options_str + __version__ + str(sys.version_info[:2])
cache_md5 = hashlib.md5(s.encode('utf8')).hexdigest()

if isinstance(self.options.cache, STRING_TYPE):
if isinstance(self.options.cache, str):
cache_fn = self.options.cache
else:
if self.options.cache is not True:
raise ConfigurationError("cache argument must be bool or str")
# Python2.7 doesn't support * syntax in tuples
cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % ((cache_md5,) + sys.version_info[:2])
cache_fn = tempfile.gettempdir() + '/.lark_cache_%s_%s_%s.tmp' % (cache_md5, *sys.version_info[:2])

if FS.exists(cache_fn):
logger.debug('Loading grammar from cache: %s', cache_fn)
@@ -336,7 +367,6 @@ class Lark(Serialize):

if self.options.priority not in _VALID_PRIORITY_OPTIONS:
raise ConfigurationError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
raise ConfigurationError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))

@@ -357,7 +387,6 @@ class Lark(Serialize):
self._terminals_dict = {t.name: t for t in self.terminals}

# If the user asked to invert the priorities, negate them all here.
# This replaces the old 'resolve__antiscore_sum' option.
if self.options.priority == 'invert':
for rule in self.rules:
if rule.options.priority is not None:
@@ -384,6 +413,7 @@ class Lark(Serialize):
if cache_fn:
logger.debug('Saving grammar to cache: %s', cache_fn)
with FS.open(cache_fn, 'wb') as f:
assert cache_md5 is not None
f.write(cache_md5.encode('utf8') + b'\n')
pickle.dump(used_files, f)
self.save(f)
@@ -486,7 +516,7 @@ class Lark(Serialize):
return inst._load({'data': data, 'memo': memo}, **kwargs)

@classmethod
def open(cls, grammar_filename, rel_to=None, **options):
def open(cls: Type[_T], grammar_filename: str, rel_to: Optional[str]=None, **options) -> _T:
"""Create an instance of Lark with the grammar given by its filename

If ``rel_to`` is provided, the function will find the grammar filename in relation to it.
@@ -504,7 +534,7 @@ class Lark(Serialize):
return cls(f, **options)

@classmethod
def open_from_package(cls, package, grammar_path, search_paths=("",), **options):
def open_from_package(cls: Type[_T], package: str, grammar_path: str, search_paths: Tuple[str, ...]=("",), **options) -> _T:
"""Create an instance of Lark with the grammar loaded from within the package `package`.
This allows grammar loading from zipapps.

@@ -525,7 +555,7 @@ class Lark(Serialize):
return 'Lark(open(%r), parser=%r, lexer=%r, ...)' % (self.source_path, self.options.parser, self.options.lexer)


def lex(self, text, dont_ignore=False):
def lex(self, text: str, dont_ignore: bool=False) -> Iterator[Token]:
"""Only lex (and postlex) the text, without parsing it. Only relevant when lexer='standard'

When dont_ignore=True, the lexer will return all tokens, even those marked for %ignore.
@@ -542,11 +572,11 @@ class Lark(Serialize):
return self.options.postlex.process(stream)
return stream

def get_terminal(self, name):
def get_terminal(self, name: str) -> TerminalDef:
"""Get information about a terminal"""
return self._terminals_dict[name]
def parse_interactive(self, text=None, start=None):
def parse_interactive(self, text: Optional[str]=None, start: Optional[str]=None) -> 'InteractiveParser':
"""Start an interactive parsing session.

Parameters:
@@ -560,7 +590,7 @@ class Lark(Serialize):
"""
return self.parser.parse_interactive(text, start=start)

def parse(self, text, start=None, on_error=None):
def parse(self, text: str, start: Optional[str]=None, on_error: 'Optional[Callable[[UnexpectedInput], bool]]'=None) -> Tree:
"""Parse the given text, according to the options provided.

Parameters:
@@ -580,23 +610,5 @@ class Lark(Serialize):
"""
return self.parser.parse(text, start=start, on_error=on_error)

@property
def source(self):
warn("Attribute Lark.source was renamed to Lark.source_path", DeprecationWarning)
return self.source_path

@source.setter
def source(self, value):
self.source_path = value

@property
def grammar_source(self):
warn("Attribute Lark.grammar_source was renamed to Lark.source_grammar", DeprecationWarning)
return self.source_grammar

@grammar_source.setter
def grammar_source(self, value):
self.source_grammar = value


###}

+ 107
- 74
lark/lexer.py View File

@@ -1,20 +1,32 @@
# Lexer Implementation

from abc import abstractmethod, ABC
import re

from .utils import Str, classify, get_regexp_width, Py36, Serialize, suppress
from contextlib import suppress
from typing import (
TypeVar, Type, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Pattern as REPattern, ClassVar, TYPE_CHECKING
)
from types import ModuleType
if TYPE_CHECKING:
from .common import LexerConf

from .utils import classify, get_regexp_width, Serialize
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
from .grammar import TOKEN_DEFAULT_PRIORITY

###{standalone
from warnings import warn
from copy import copy


class Pattern(Serialize):
raw = None
type = None
class Pattern(Serialize, ABC):

value: str
flags: Collection[str]
raw: Optional[str]
type: ClassVar[str]

def __init__(self, value, flags=(), raw=None):
def __init__(self, value: str, flags: Collection[str]=(), raw: Optional[str]=None) -> None:
self.value = value
self.flags = frozenset(flags)
self.raw = raw
@@ -29,50 +41,49 @@ class Pattern(Serialize):
def __eq__(self, other):
return type(self) == type(other) and self.value == other.value and self.flags == other.flags

def to_regexp(self):
@abstractmethod
def to_regexp(self) -> str:
raise NotImplementedError()

def min_width(self):
@property
@abstractmethod
def min_width(self) -> int:
raise NotImplementedError()

def max_width(self):
@property
@abstractmethod
def max_width(self) -> int:
raise NotImplementedError()

if Py36:
# Python 3.6 changed syntax for flags in regular expression
def _get_flags(self, value):
for f in self.flags:
value = ('(?%s:%s)' % (f, value))
return value

else:
def _get_flags(self, value):
for f in self.flags:
value = ('(?%s)' % f) + value
return value

def _get_flags(self, value):
for f in self.flags:
value = ('(?%s:%s)' % (f, value))
return value


class PatternStr(Pattern):
__serialize_fields__ = 'value', 'flags'

type = "str"
type: ClassVar[str] = "str"

def to_regexp(self):
def to_regexp(self) -> str:
return self._get_flags(re.escape(self.value))

@property
def min_width(self):
def min_width(self) -> int:
return len(self.value)

@property
def max_width(self) -> int:
return len(self.value)
max_width = min_width


class PatternRE(Pattern):
__serialize_fields__ = 'value', 'flags', '_width'

type = "re"
type: ClassVar[str] = "re"

def to_regexp(self):
def to_regexp(self) -> str:
return self._get_flags(self.value)

_width = None
@@ -82,11 +93,11 @@ class PatternRE(Pattern):
return self._width

@property
def min_width(self):
def min_width(self) -> int:
return self._get_width()[0]

@property
def max_width(self):
def max_width(self) -> int:
return self._get_width()[1]


@@ -94,7 +105,11 @@ class TerminalDef(Serialize):
__serialize_fields__ = 'name', 'pattern', 'priority'
__serialize_namespace__ = PatternStr, PatternRE

def __init__(self, name, pattern, priority=1):
name: str
pattern: Pattern
priority: int

def __init__(self, name: str, pattern: Pattern, priority: int=TOKEN_DEFAULT_PRIORITY) -> None:
assert isinstance(pattern, Pattern), pattern
self.name = name
self.pattern = pattern
@@ -103,14 +118,15 @@ class TerminalDef(Serialize):
def __repr__(self):
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)

def user_repr(self):
def user_repr(self) -> str:
if self.name.startswith('__'): # We represent a generated terminal
return self.pattern.raw or self.name
else:
return self.name

_T = TypeVar('_T')

class Token(Str):
class Token(str):
"""A string with meta-information, that is produced by the lexer.

When parsing text, the resulting chunks of the input that haven't been discarded,
@@ -131,7 +147,16 @@ class Token(Str):
"""
__slots__ = ('type', 'start_pos', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos')

def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None, pos_in_stream=None):
type: str
start_pos: int
value: Any
line: int
column: int
end_line: int
end_column: int
end_pos: int

def __new__(cls, type_, value, start_pos=None, line=None, column=None, end_line=None, end_column=None, end_pos=None):
try:
inst = super(Token, cls).__new__(cls, value)
except UnicodeDecodeError:
@@ -139,7 +164,7 @@ class Token(Str):
inst = super(Token, cls).__new__(cls, value)

inst.type = type_
inst.start_pos = start_pos if start_pos is not None else pos_in_stream
inst.start_pos = start_pos
inst.value = value
inst.line = line
inst.column = column
@@ -148,12 +173,7 @@ class Token(Str):
inst.end_pos = end_pos
return inst

@property
def pos_in_stream(self):
warn("Attribute Token.pos_in_stream was renamed to Token.start_pos", DeprecationWarning, 2)
return self.start_pos

def update(self, type_=None, value=None):
def update(self, type_: Optional[str]=None, value: Optional[Any]=None) -> 'Token':
return Token.new_borrow_pos(
type_ if type_ is not None else self.type,
value if value is not None else self.value,
@@ -161,7 +181,7 @@ class Token(Str):
)

@classmethod
def new_borrow_pos(cls, type_, value, borrow_t):
def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: 'Token') -> _T:
return cls(type_, value, borrow_t.start_pos, borrow_t.line, borrow_t.column, borrow_t.end_line, borrow_t.end_column, borrow_t.end_pos)

def __reduce__(self):
@@ -177,9 +197,9 @@ class Token(Str):
if isinstance(other, Token) and self.type != other.type:
return False

return Str.__eq__(self, other)
return str.__eq__(self, other)

__hash__ = Str.__hash__
__hash__ = str.__hash__


class LineCounter:
@@ -198,7 +218,7 @@ class LineCounter:

return self.char_pos == other.char_pos and self.newline_char == other.newline_char

def feed(self, token, test_newline=True):
def feed(self, token: Token, test_newline=True):
"""Consume a token and calculate the new line & column.

As an optional optimization, set test_newline=False if token doesn't contain a newline.
@@ -262,7 +282,6 @@ def _create_unless(terminals, g_regex_flags, re_, use_bytes):
return new_terminals, callback



class Scanner:
def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
self.terminals = terminals
@@ -301,7 +320,7 @@ class Scanner:
return m.group(0), type_from_index[m.lastindex]


def _regexp_has_newline(r):
def _regexp_has_newline(r: str):
r"""Expressions that may indicate newlines in a regexp:
- newlines (\n)
- escaped newline (\\n)
@@ -312,13 +331,35 @@ def _regexp_has_newline(r):
return '\n' in r or '\\n' in r or '\\s' in r or '[^' in r or ('(?s' in r and '.' in r)


class Lexer(object):
class LexerState(object):
__slots__ = 'text', 'line_ctr', 'last_token'

def __init__(self, text, line_ctr, last_token=None):
self.text = text
self.line_ctr = line_ctr
self.last_token = last_token

def __eq__(self, other):
if not isinstance(other, LexerState):
return NotImplemented

return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token

def __copy__(self):
return type(self)(self.text, copy(self.line_ctr), self.last_token)


_Callback = Callable[[Token], Token]

class Lexer(ABC):
"""Lexer interface

Method Signatures:
lex(self, text) -> Iterator[Token]
lex(self, lexer_state, parser_state) -> Iterator[Token]
"""
lex = NotImplemented
@abstractmethod
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
return NotImplemented

def make_lexer_state(self, text):
line_ctr = LineCounter(b'\n' if isinstance(text, bytes) else '\n')
@@ -327,7 +368,14 @@ class Lexer(object):

class TraditionalLexer(Lexer):

def __init__(self, conf):
terminals: Collection[TerminalDef]
ignore_types: FrozenSet[str]
newline_types: FrozenSet[str]
user_callbacks: Dict[str, _Callback]
callback: Dict[str, _Callback]
re: ModuleType

def __init__(self, conf: 'LexerConf') -> None:
terminals = list(conf.terminals)
assert all(isinstance(t, TerminalDef) for t in terminals), terminals

@@ -382,12 +430,12 @@ class TraditionalLexer(Lexer):
def match(self, text, pos):
return self.scanner.match(text, pos)

def lex(self, state, parser_state):
def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
with suppress(EOFError):
while True:
yield self.next_token(state, parser_state)

def next_token(self, lex_state, parser_state=None):
def next_token(self, lex_state: LexerState, parser_state: Any=None) -> Token:
line_ctr = lex_state.line_ctr
while line_ctr.char_pos < len(lex_state.text):
res = self.match(lex_state.text, line_ctr.char_pos)
@@ -423,27 +471,12 @@ class TraditionalLexer(Lexer):
raise EOFError(self)


class LexerState(object):
__slots__ = 'text', 'line_ctr', 'last_token'

def __init__(self, text, line_ctr, last_token=None):
self.text = text
self.line_ctr = line_ctr
self.last_token = last_token

def __eq__(self, other):
if not isinstance(other, LexerState):
return NotImplemented

return self.text is other.text and self.line_ctr == other.line_ctr and self.last_token == other.last_token

def __copy__(self):
return type(self)(self.text, copy(self.line_ctr), self.last_token)


class ContextualLexer(Lexer):

def __init__(self, conf, states, always_accept=()):
lexers: Dict[str, TraditionalLexer]
root_lexer: TraditionalLexer

def __init__(self, conf: 'LexerConf', states: Dict[str, Collection[str]], always_accept: Collection[str]=()) -> None:
terminals = list(conf.terminals)
terminals_by_name = conf.terminals_by_name

@@ -471,7 +504,7 @@ class ContextualLexer(Lexer):
def make_lexer_state(self, text):
return self.root_lexer.make_lexer_state(text)

def lex(self, lexer_state, parser_state):
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
try:
while True:
lexer = self.lexers[parser_state.position]


+ 35
- 33
lark/load_grammar.py View File

@@ -4,20 +4,20 @@ import os.path
import sys
from collections import namedtuple
from copy import copy, deepcopy
from io import open
import pkgutil
from ast import literal_eval
from numbers import Integral
from contextlib import suppress
from typing import List, Tuple, Union, Callable, Dict, Optional

from .utils import bfs, Py36, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors
from .utils import bfs, logger, classify_bool, is_id_continue, is_id_start, bfs_all_unique, small_factors
from .lexer import Token, TerminalDef, PatternStr, PatternRE

from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import ParsingFrontend
from .common import LexerConf, ParserConf
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .utils import classify, suppress, dedup_list, Str
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol, TOKEN_DEFAULT_PRIORITY
from .utils import classify, dedup_list
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken, ParseError, UnexpectedInput

from .tree import Tree, SlottedTree as ST
from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive
@@ -585,18 +585,7 @@ class PrepareLiterals(Transformer_InPlace):


def _make_joined_pattern(regexp, flags_set):
# In Python 3.6, a new syntax for flags was introduced, that allows us to restrict the scope
# of flags to a specific regexp group. We are already using it in `lexer.Pattern._get_flags`
# However, for prior Python versions, we still need to use global flags, so we have to make sure
# that there are no flag collisions when we merge several terminals.
flags = ()
if not Py36:
if len(flags_set) > 1:
raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!")
elif len(flags_set) == 1:
flags ,= flags_set

return PatternRE(regexp, flags)
return PatternRE(regexp, ())

class TerminalTreeToPattern(Transformer_NonRecursive):
def pattern(self, ps):
@@ -652,9 +641,9 @@ class PrepareSymbols(Transformer_InPlace):
if isinstance(v, Tree):
return v
elif v.type == 'RULE':
return NonTerminal(Str(v.value))
return NonTerminal(str(v.value))
elif v.type == 'TERMINAL':
return Terminal(Str(v.value), filter_out=v.startswith('_'))
return Terminal(str(v.value), filter_out=v.startswith('_'))
assert False


@@ -664,7 +653,12 @@ def nr_deepcopy_tree(t):


class Grammar:
def __init__(self, rule_defs, term_defs, ignore):

term_defs: List[Tuple[str, Tuple[Tree, int]]]
rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]]
ignore: List[str]

def __init__(self, rule_defs: List[Tuple[str, Tuple[str, ...], Tree, RuleOptions]], term_defs: List[Tuple[str, Tuple[Tree, int]]], ignore: List[str]) -> None:
self.term_defs = term_defs
self.rule_defs = rule_defs
self.ignore = ignore
@@ -807,14 +801,18 @@ class FromPackageLoader(object):
pkg_name: The name of the package. You can probably provide `__name__` most of the time
search_paths: All the path that will be search on absolute imports.
"""
def __init__(self, pkg_name, search_paths=("", )):

pkg_name: str
search_paths: Tuple[str, ...]

def __init__(self, pkg_name: str, search_paths: Tuple[str, ...]=("", )) -> None:
self.pkg_name = pkg_name
self.search_paths = search_paths

def __repr__(self):
return "%s(%r, %r)" % (type(self).__name__, self.pkg_name, self.search_paths)

def __call__(self, base_path, grammar_path):
def __call__(self, base_path: Union[None, str, PackageResource], grammar_path: str) -> Tuple[PackageResource, str]:
if base_path is None:
to_try = self.search_paths
else:
@@ -991,7 +989,7 @@ def _search_interactive_parser(interactive_parser, predicate):
if predicate(p):
return path, p

def find_grammar_errors(text, start='start'):
def find_grammar_errors(text: str, start: str='start') -> List[Tuple[UnexpectedInput, str]]:
errors = []
def on_error(e):
errors.append((e, _error_repr(e)))
@@ -1040,7 +1038,12 @@ def _mangle_exp(exp, mangle):


class GrammarBuilder:
def __init__(self, global_keep_all_tokens=False, import_paths=None, used_files=None):

global_keep_all_tokens: bool
import_paths: List[Union[str, Callable]]
used_files: Dict[str, str]

def __init__(self, global_keep_all_tokens: bool=False, import_paths: Optional[List[Union[str, Callable]]]=None, used_files: Optional[Dict[str, str]]=None) -> None:
self.global_keep_all_tokens = global_keep_all_tokens
self.import_paths = import_paths or []
self.used_files = used_files or {}
@@ -1066,8 +1069,7 @@ class GrammarBuilder:
if self._is_term(name):
if options is None:
options = 1
# if we don't use Integral here, we run into python2.7/python3 problems with long vs int
elif not isinstance(options, Integral):
elif not isinstance(options, int):
raise GrammarError("Terminal require a single int as 'options' (e.g. priority), got %s" % (type(options),))
else:
if options is None:
@@ -1120,7 +1122,7 @@ class GrammarBuilder:

name = '__IGNORE_%d'% len(self._ignore_names)
self._ignore_names.append(name)
self._definitions[name] = ((), t, 1)
self._definitions[name] = ((), t, TOKEN_DEFAULT_PRIORITY)

def _declare(self, *names):
for name in names:
@@ -1171,7 +1173,7 @@ class GrammarBuilder:
else:
name = tree.children[0].value
params = () # TODO terminal templates
opts = int(tree.children[1]) if len(tree.children) == 3 else 1 # priority
opts = int(tree.children[1]) if len(tree.children) == 3 else TOKEN_DEFAULT_PRIORITY # priority
exp = tree.children[-1]

if mangle is not None:
@@ -1182,7 +1184,7 @@ class GrammarBuilder:
return name, exp, params, opts


def load_grammar(self, grammar_text, grammar_name="<?>", mangle=None):
def load_grammar(self, grammar_text: str, grammar_name: str="<?>", mangle: Optional[Callable[[str], str]]=None) -> None:
tree = _parse_grammar(grammar_text, grammar_name)

imports = {}
@@ -1245,7 +1247,7 @@ class GrammarBuilder:
self._definitions = {k: v for k, v in self._definitions.items() if k in _used}


def do_import(self, dotted_path, base_path, aliases, base_mangle=None):
def do_import(self, dotted_path: Tuple[str, ...], base_path: Optional[str], aliases: Dict[str, str], base_mangle: Optional[Callable[[str], str]]=None) -> None:
assert dotted_path
mangle = _get_mangle('__'.join(dotted_path), aliases, base_mangle)
grammar_path = os.path.join(*dotted_path) + EXT
@@ -1281,7 +1283,7 @@ class GrammarBuilder:
assert False, "Couldn't import grammar %s, but a corresponding file was found at a place where lark doesn't search for it" % (dotted_path,)


def validate(self):
def validate(self) -> None:
for name, (params, exp, _options) in self._definitions.items():
for i, p in enumerate(params):
if p in self._definitions:
@@ -1310,7 +1312,7 @@ class GrammarBuilder:
if not set(self._definitions).issuperset(self._ignore_names):
raise GrammarError("Terminals %s were marked to ignore but were not defined!" % (set(self._ignore_names) - set(self._definitions)))

def build(self):
def build(self) -> Grammar:
self.validate()
rule_defs = []
term_defs = []


+ 5
- 14
lark/parse_tree_builder.py View File

@@ -1,7 +1,8 @@
from typing import List

from .exceptions import GrammarError, ConfigurationError
from .lexer import Token
from .tree import Tree
from .visitors import InlineTransformer # XXX Deprecated
from .visitors import Transformer_InPlace
from .visitors import _vargs_meta, _vargs_meta_inline

@@ -152,7 +153,7 @@ def _should_expand(sym):
return not sym.is_term and sym.name.startswith('_')


def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]):
# Prepare empty_indices as: How many Nones to insert at each index?
if _empty_indices:
assert _empty_indices.count(False) == len(expansion)
@@ -301,12 +302,6 @@ class AmbiguousIntermediateExpander:
return self.node_builder(children)


def ptb_inline_args(func):
@wraps(func)
def f(children):
return func(*children)
return f


def inplace_transformer(func):
@wraps(func)
@@ -362,15 +357,11 @@ class ParseTreeBuilder:
user_callback_name = rule.alias or rule.options.template_source or rule.origin.name
try:
f = getattr(transformer, user_callback_name)
# XXX InlineTransformer is deprecated!
wrapper = getattr(f, 'visit_wrapper', None)
if wrapper is not None:
f = apply_visit_wrapper(f, user_callback_name, wrapper)
else:
if isinstance(transformer, InlineTransformer):
f = ptb_inline_args(f)
elif isinstance(transformer, Transformer_InPlace):
f = inplace_transformer(f)
elif isinstance(transformer, Transformer_InPlace):
f = inplace_transformer(f)
except AttributeError:
f = partial(self.tree_class, user_callback_name)



+ 12
- 10
lark/parser_frontends.py View File

@@ -7,7 +7,7 @@ from .parsers.lalr_parser import LALR_Parser
from .tree import Tree
from .common import LexerConf, ParserConf
try:
import regex
import regex # type: ignore
except ImportError:
regex = None
import re
@@ -32,20 +32,13 @@ class MakeParsingFrontend:
self.parser_type = parser_type
self.lexer_type = lexer_type

def __call__(self, lexer_conf, parser_conf, options):
assert isinstance(lexer_conf, LexerConf)
assert isinstance(parser_conf, ParserConf)
parser_conf.parser_type = self.parser_type
lexer_conf.lexer_type = self.lexer_type
return ParsingFrontend(lexer_conf, parser_conf, options)

def deserialize(self, data, memo, lexer_conf, callbacks, options):
parser_conf = ParserConf.deserialize(data['parser_conf'], memo)
parser = LALR_Parser.deserialize(data['parser'], memo, callbacks, options.debug)
parser_conf.callbacks = callbacks
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)

# ... Continued later in the module


class ParsingFrontend(Serialize):
@@ -169,7 +162,7 @@ class EarleyRegexpMatcher:
def __init__(self, lexer_conf):
self.regexps = {}
for t in lexer_conf.terminals:
if t.priority != 1:
if t.priority:
raise GrammarError("Dynamic Earley doesn't support weights on terminals", t, t.priority)
regexp = t.pattern.to_regexp()
try:
@@ -237,3 +230,12 @@ class CYK_FrontEnd:

def _apply_callback(self, tree):
return self.callbacks[tree.rule](tree.children)


class MakeParsingFrontend(MakeParsingFrontend):
def __call__(self, lexer_conf, parser_conf, options):
assert isinstance(lexer_conf, LexerConf)
assert isinstance(parser_conf, ParserConf)
parser_conf.parser_type = self.parser_type
lexer_conf.lexer_type = self.lexer_type
return ParsingFrontend(lexer_conf, parser_conf, options)

+ 0
- 4
lark/parsers/lalr_interactive_parser.py View File

@@ -126,7 +126,3 @@ class ImmutableInteractiveParser(InteractiveParser):
p = copy(self)
return InteractiveParser(p.parser, p.parser_state, p.lexer_state)


# Deprecated class names for the interactive parser
ParserPuppet = InteractiveParser
ImmutableParserPuppet = ImmutableInteractiveParser

+ 0
- 3
lark/parsers/lalr_puppet.py View File

@@ -1,3 +0,0 @@
# Deprecated
from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet

+ 12
- 5
lark/reconstruct.py View File

@@ -1,11 +1,13 @@
"""Reconstruct text from a tree, based on Lark grammar"""

from typing import List, Dict, Union, Callable, Iterable, Optional
import unicodedata

from .lark import Lark
from .tree import Tree
from .visitors import Transformer_InPlace
from .lexer import Token, PatternStr
from .grammar import Terminal, NonTerminal
from .lexer import Token, PatternStr, TerminalDef
from .grammar import Terminal, NonTerminal, Symbol

from .tree_matcher import TreeMatcher, is_discarded_terminal
from .utils import is_id_continue
@@ -21,7 +23,10 @@ def is_iter_empty(i):
class WriteTokensTransformer(Transformer_InPlace):
"Inserts discarded tokens into their correct place, according to the rules of grammar"

def __init__(self, tokens, term_subs):
tokens: Dict[str, TerminalDef]
term_subs: Dict[str, Callable[[Symbol], str]]

def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None:
self.tokens = tokens
self.term_subs = term_subs

@@ -70,7 +75,9 @@ class Reconstructor(TreeMatcher):
term_subs: a dictionary of [Terminal name as str] to [output text as str]
"""

def __init__(self, parser, term_subs=None):
write_tokens: WriteTokensTransformer

def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None:
TreeMatcher.__init__(self, parser)

self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {})
@@ -87,7 +94,7 @@ class Reconstructor(TreeMatcher):
else:
yield item

def reconstruct(self, tree, postproc=None, insert_spaces=True):
def reconstruct(self, tree: Tree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str:
x = self._reconstruct(tree)
if postproc:
x = postproc(x)


+ 3
- 2
lark/tools/nearley.py View File

@@ -6,7 +6,7 @@ import codecs
import argparse


from lark import Lark, InlineTransformer
from lark import Lark, Transformer, v_args

nearley_grammar = r"""
start: (ruledef|directive)+
@@ -50,7 +50,8 @@ def _get_rulename(name):
name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
return 'n_' + name.replace('$', '__DOLLAR__').lower()

class NearleyToLark(InlineTransformer):
@v_args(inline=True)
class NearleyToLark(Transformer):
def __init__(self):
self._count = 0
self.extra_rules = {}


+ 10
- 16
lark/tools/standalone.py View File

@@ -1,5 +1,3 @@
from __future__ import print_function

###{standalone
#
#
@@ -26,7 +24,14 @@ from __future__ import print_function
#
#

from io import open
from abc import ABC, abstractmethod
from collections.abc import Sequence
from types import ModuleType
from typing import (
TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Union, Iterable, IO, TYPE_CHECKING,
Pattern as REPattern, ClassVar, Set,
)
###}

import sys
@@ -35,15 +40,13 @@ import os
from os import path
from collections import defaultdict
from functools import partial
from argparse import ArgumentParser, SUPPRESS
from warnings import warn
from argparse import ArgumentParser

import lark
from lark import Lark
from lark.tools import lalr_argparser, build_lalr, make_warnings_comments


from lark.grammar import RuleOptions, Rule
from lark.grammar import Rule
from lark.lexer import TerminalDef

_dir = path.dirname(__file__)
@@ -118,11 +121,6 @@ def strip_docstrings(line_gen):
return ''.join(res)


def main(fobj, start, print=print):
warn('`lark.tools.standalone.main` is being redesigned. Use `gen_standalone`', DeprecationWarning)
lark_inst = Lark(fobj, parser="lalr", lexer="contextual", start=start)
gen_standalone(lark_inst, print)

def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
if output is None:
output = partial(print, file=out)
@@ -179,15 +177,11 @@ def main():
make_warnings_comments()
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
parser.add_argument("old_start", nargs='?', help=SUPPRESS)
parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
if len(sys.argv)==1:
parser.print_help(sys.stderr)
sys.exit(1)
ns = parser.parse_args()
if ns.old_start is not None:
warn('The syntax `python -m lark.tools.standalone <grammar-file> <start>` is deprecated. Use the -s option')
ns.start.append(ns.old_start)

lark_inst, out = build_lalr(ns)
gen_standalone(lark_inst, out=out, compress=ns.compress)


+ 39
- 31
lark/tree.py View File

@@ -1,16 +1,36 @@

try:
from future_builtins import filter
from future_builtins import filter # type: ignore
except ImportError:
pass

import sys
from copy import deepcopy

from typing import List, Callable, Iterator, Union, Optional, TYPE_CHECKING

if TYPE_CHECKING:
from .lexer import TerminalDef
if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal

###{standalone
from collections import OrderedDict


class Meta:

empty: bool
line: int
column: int
start_pos: int
end_line: int
end_column: int
end_pos: int
orig_expansion: 'List[TerminalDef]'
match_tree: bool

def __init__(self):
self.empty = True

@@ -27,13 +47,17 @@ class Tree(object):
meta: Line & Column numbers (if ``propagate_positions`` is enabled).
meta attributes: line, column, start_pos, end_line, end_column, end_pos
"""
def __init__(self, data, children, meta=None):

data: str
children: 'List[Union[str, Tree]]'

def __init__(self, data: str, children: 'List[Union[str, Tree]]', meta: Optional[Meta]=None) -> None:
self.data = data
self.children = children
self._meta = meta

@property
def meta(self):
def meta(self) -> Meta:
if self._meta is None:
self._meta = Meta()
return self._meta
@@ -57,7 +81,7 @@ class Tree(object):

return l

def pretty(self, indent_str=' '):
def pretty(self, indent_str: str=' ') -> str:
"""Returns an indented string representation of the tree.

Great for debugging.
@@ -73,10 +97,10 @@ class Tree(object):
def __ne__(self, other):
return not (self == other)

def __hash__(self):
def __hash__(self) -> int:
return hash((self.data, tuple(self.children)))

def iter_subtrees(self):
def iter_subtrees(self) -> 'Iterator[Tree]':
"""Depth-first iteration.

Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).
@@ -91,17 +115,17 @@ class Tree(object):
del queue
return reversed(list(subtrees.values()))

def find_pred(self, pred):
def find_pred(self, pred: 'Callable[[Tree], bool]') -> 'Iterator[Tree]':
"""Returns all nodes of the tree that evaluate pred(node) as true."""
return filter(pred, self.iter_subtrees())

def find_data(self, data):
def find_data(self, data: str) -> 'Iterator[Tree]':
"""Returns all nodes of the tree whose data equals the given data."""
return self.find_pred(lambda t: t.data == data)

###}

def expand_kids_by_index(self, *indices):
def expand_kids_by_index(self, *indices: int) -> None:
"""Expand (inline) children at the given indices"""
for i in sorted(indices, reverse=True): # reverse so that changing tail won't affect indices
kid = self.children[i]
@@ -118,7 +142,7 @@ class Tree(object):
return changed


def scan_values(self, pred):
def scan_values(self, pred: 'Callable[[Union[str, Tree]], bool]') -> Iterator[str]:
"""Return all values in the tree that evaluate pred(value) as true.

This can be used to find all the tokens in the tree.
@@ -151,36 +175,20 @@ class Tree(object):
def __deepcopy__(self, memo):
return type(self)(self.data, deepcopy(self.children, memo), meta=self._meta)

def copy(self):
def copy(self) -> 'Tree':
return type(self)(self.data, self.children)

def set(self, data, children):
def set(self, data: str, children: 'List[Union[str, Tree]]') -> None:
self.data = data
self.children = children

# XXX Deprecated! Here for backwards compatibility <0.6.0
@property
def line(self):
return self.meta.line

@property
def column(self):
return self.meta.column

@property
def end_line(self):
return self.meta.end_line

@property
def end_column(self):
return self.meta.end_column


class SlottedTree(Tree):
__slots__ = 'data', 'children', 'rule', '_meta'


def pydot__tree_to_png(tree, filename, rankdir="LR", **kwargs):
def pydot__tree_to_png(tree: Tree, filename: str, rankdir: 'Literal["TB", "LR", "BT", "RL"]'="LR", **kwargs) -> None:
graph = pydot__tree_to_graph(tree, rankdir, **kwargs)
graph.write_png(filename)

@@ -201,7 +209,7 @@ def pydot__tree_to_graph(tree, rankdir="LR", **kwargs):
possible attributes, see https://www.graphviz.org/doc/info/attrs.html.
"""

import pydot
import pydot # type: ignore
graph = pydot.Dot(graph_type='digraph', rankdir=rankdir, **kwargs)

i = [0]


+ 4
- 47
lark/utils.py View File

@@ -1,4 +1,3 @@
import hashlib
import unicodedata
import os
from functools import reduce
@@ -7,23 +6,12 @@ from collections import deque
###{standalone
import sys, re
import logging
from io import open
logger = logging.getLogger("lark")
logger: logging.Logger = logging.getLogger("lark")
logger.addHandler(logging.StreamHandler())
# Set to highest level, since we have some warnings amongst the code
# By default, we should not output any log messages
logger.setLevel(logging.CRITICAL)

if sys.version_info[0]>2:
from abc import ABC, abstractmethod
else:
from abc import ABCMeta, abstractmethod
class ABC(object): # Provide Python27 compatibility
__slots__ = ()
__metclass__ = ABCMeta


Py36 = (sys.version_info[:2] >= (3, 6))

NO_VALUE = object()

@@ -120,28 +108,16 @@ class SerializeMemoizer(Serialize):
return _deserialize(data, namespace, memo)


try:
STRING_TYPE = basestring
except NameError: # Python 3
STRING_TYPE = str


import types
from functools import wraps, partial
from contextlib import contextmanager

Str = type(u'')
try:
classtype = types.ClassType # Python2
except AttributeError:
classtype = type # Python3


def smart_decorator(f, create_decorator):
if isinstance(f, types.FunctionType):
return wraps(f)(create_decorator(f, True))

elif isinstance(f, (classtype, type, types.BuiltinFunctionType)):
elif isinstance(f, (type, types.BuiltinFunctionType)):
return wraps(f)(create_decorator(f, False))

elif isinstance(f, types.MethodType):
@@ -156,7 +132,7 @@ def smart_decorator(f, create_decorator):


try:
import regex
import regex # type: ignore
except ImportError:
regex = None

@@ -222,25 +198,6 @@ def dedup_list(l):
return [x for x in l if not (x in dedup or dedup.add(x))]


try:
from contextlib import suppress # Python 3
except ImportError:
@contextmanager
def suppress(*excs):
'''Catch and dismiss the provided exception

>>> x = 'hello'
>>> with suppress(IndexError):
... x = x[10]
>>> x
'hello'
'''
try:
yield
except excs:
pass


class Enumerator(Serialize):
def __init__(self):
self.enums = {}
@@ -284,7 +241,7 @@ def combine_alternatives(lists):
try:
import atomicwrites
except ImportError:
atomicwrites = None
atomicwrites = None # type: ignore

class FS:
exists = staticmethod(os.path.exists)


+ 36
- 31
lark/visitors.py View File

@@ -1,3 +1,5 @@
from typing import TypeVar, Tuple, List, Callable, Generic, Type, Union, Optional
from abc import ABC
from functools import wraps

from .utils import smart_decorator, combine_alternatives
@@ -8,6 +10,10 @@ from .lexer import Token
###{standalone
from inspect import getmembers, getmro

_T = TypeVar('_T')
_R = TypeVar('_R')
_FUNC = Callable[..., _T]
_DECORATED = Union[_FUNC, type]

class Discard(Exception):
"""When raising the Discard exception in a transformer callback,
@@ -46,7 +52,7 @@ class _Decoratable:
return cls


class Transformer(_Decoratable):
class Transformer(_Decoratable, ABC, Generic[_T]):
"""Transformers visit each node of the tree, and run the appropriate method on it according to the node's data.

Methods are provided by the user via inheritance, and called according to ``tree.data``.
@@ -74,7 +80,7 @@ class Transformer(_Decoratable):
"""
__visit_tokens__ = True # For backwards compatibility

def __init__(self, visit_tokens=True):
def __init__(self, visit_tokens: bool=True) -> None:
self.__visit_tokens__ = visit_tokens

def _call_userfunc(self, tree, new_children=None):
@@ -125,11 +131,11 @@ class Transformer(_Decoratable):
children = list(self._transform_children(tree.children))
return self._call_userfunc(tree, children)

def transform(self, tree):
def transform(self, tree: Tree) -> _T:
"Transform the given tree, and return the final result"
return self._transform_tree(tree)

def __mul__(self, other):
def __mul__(self, other: 'Transformer[_T]') -> 'TransformerChain[_T]':
"""Chain two transformers together, returning a new transformer.
"""
return TransformerChain(self, other)
@@ -213,17 +219,19 @@ class InlineTransformer(Transformer): # XXX Deprecated
else:
return f(*children)

class TransformerChain(Generic[_T]):

transformers: Tuple[Transformer[_T], ...]

class TransformerChain(object):
def __init__(self, *transformers):
def __init__(self, *transformers: Transformer[_T]) -> None:
self.transformers = transformers

def transform(self, tree):
def transform(self, tree: Tree) -> _T:
for t in self.transformers:
tree = t.transform(tree)
return tree

def __mul__(self, other):
def __mul__(self, other: Transformer[_T]) -> 'TransformerChain[_T]':
return TransformerChain(*self.transformers + (other,))


@@ -304,19 +312,19 @@ class VisitorBase:
return cls


class Visitor(VisitorBase):
class Visitor(VisitorBase, ABC, Generic[_T]):
"""Tree visitor, non-recursive (can handle huge trees).

Visiting a node calls its methods (provided by the user via inheritance) according to ``tree.data``
"""

def visit(self, tree):
def visit(self, tree: Tree) -> Tree:
"Visits the tree, starting with the leaves and finally the root (bottom-up)"
for subtree in tree.iter_subtrees():
self._call_userfunc(subtree)
return tree

def visit_topdown(self,tree):
def visit_topdown(self, tree: Tree) -> Tree:
"Visit the tree, starting at the root, and ending at the leaves (top-down)"
for subtree in tree.iter_subtrees_topdown():
self._call_userfunc(subtree)
@@ -331,7 +339,7 @@ class Visitor_Recursive(VisitorBase):
Slightly faster than the non-recursive version.
"""

def visit(self, tree):
def visit(self, tree: Tree) -> Tree:
"Visits the tree, starting with the leaves and finally the root (bottom-up)"
for child in tree.children:
if isinstance(child, Tree):
@@ -340,7 +348,7 @@ class Visitor_Recursive(VisitorBase):
self._call_userfunc(tree)
return tree

def visit_topdown(self,tree):
def visit_topdown(self,tree: Tree) -> Tree:
"Visit the tree, starting at the root, and ending at the leaves (top-down)"
self._call_userfunc(tree)

@@ -351,16 +359,7 @@ class Visitor_Recursive(VisitorBase):
return tree


def visit_children_decor(func):
"See Interpreter"
@wraps(func)
def inner(cls, tree):
values = cls.visit_children(tree)
return func(cls, values)
return inner


class Interpreter(_Decoratable):
class Interpreter(_Decoratable, ABC, Generic[_T]):
"""Interpreter walks the tree starting at the root.

Visits the tree, starting with the root and finally the leaves (top-down)
@@ -372,7 +371,7 @@ class Interpreter(_Decoratable):
This allows the user to implement branching and loops.
"""

def visit(self, tree):
def visit(self, tree: Tree) -> _T:
f = getattr(self, tree.data)
wrapper = getattr(f, 'visit_wrapper', None)
if wrapper is not None:
@@ -380,7 +379,7 @@ class Interpreter(_Decoratable):
else:
return f(tree)

def visit_children(self, tree):
def visit_children(self, tree: Tree) -> List[_T]:
return [self.visit(child) if isinstance(child, Tree) else child
for child in tree.children]

@@ -391,6 +390,16 @@ class Interpreter(_Decoratable):
return self.visit_children(tree)


_InterMethod = Callable[[Type[Interpreter], _T], _R]

def visit_children_decor(func: _InterMethod) -> _InterMethod:
"See Interpreter"
@wraps(func)
def inner(cls, tree):
values = cls.visit_children(tree)
return func(cls, values)
return inner

# Decorators

def _apply_decorator(obj, decorator, **kwargs):
@@ -416,10 +425,6 @@ def _inline_args__func(func):
return smart_decorator(func, create_decorator)


def inline_args(obj): # XXX Deprecated
return _apply_decorator(obj, _inline_args__func)


def _visitor_args_func_dec(func, visit_wrapper=None, static=False):
def create_decorator(_f, with_self):
if with_self:
@@ -444,12 +449,12 @@ def _vargs_inline(f, _data, children, _meta):
def _vargs_meta_inline(f, _data, children, meta):
return f(meta, *children)
def _vargs_meta(f, _data, children, meta):
return f(children, meta) # TODO swap these for consistency? Backwards incompatible!
return f(meta, children)
def _vargs_tree(f, data, children, meta):
return f(Tree(data, children, meta))


def v_args(inline=False, meta=False, tree=False, wrapper=None):
def v_args(inline: bool=False, meta: bool=False, tree: bool=False, wrapper: Optional[Callable]=None) -> Callable[[_DECORATED], _DECORATED]:
"""A convenience decorator factory for modifying the behavior of user-supplied visitor methods.

By default, callback methods of transformers/visitors accept one argument - a list of the node's children.


+ 2
- 2
setup.py View File

@@ -48,18 +48,18 @@ Main Features:
- CYK parser, for highly ambiguous grammars
- EBNF grammar
- Unicode fully supported
- Python 2 & 3 compatible
- Automatic line & column tracking
- Standard library of terminals (strings, numbers, names, etc.)
- Import grammars from Nearley.js
- Extensive test suite
- And much more!

Since version 1.0, only Python versions 3.6 and up are supported.
''',

classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Text Processing :: General",


+ 1
- 1
tests/test_nearley/nearley

@@ -1 +1 @@
Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44
Subproject commit 326831689826cb1b9a4d21d1ce0d5db9278e9636

+ 3
- 4
tests/test_parser.py View File

@@ -8,7 +8,7 @@ import os
import sys
from copy import copy, deepcopy

from lark.utils import Py36, isascii
from lark.utils import isascii

from lark import Token, Transformer_NonRecursive, LexError

@@ -208,11 +208,11 @@ class TestParsers(unittest.TestCase):

@v_args(meta=True)
class T1(Transformer):
def a(self, children, meta):
def a(self, meta, children):
assert not children
return meta.line

def start(self, children, meta):
def start(self, meta, children):
return children

@v_args(meta=True, inline=True)
@@ -1565,7 +1565,6 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual( g.parse('"hello"').children, ['"hello"'])
self.assertEqual( g.parse("'hello'").children, ["'hello'"])

@unittest.skipIf(not Py36, "Required re syntax only exists in python3.6+")
def test_join_regex_flags(self):
g = r"""
start: A


+ 2
- 2
tests/test_reconstructor.py View File

@@ -183,8 +183,8 @@ class TestReconstructor(TestCase):
keyword x += y
"""

l1 = Lark(g1, parser='lalr')
l2 = Lark(g2, parser='lalr')
l1 = Lark(g1, parser='lalr', maybe_placeholders=False)
l2 = Lark(g2, parser='lalr', maybe_placeholders=False)
r = Reconstructor(l2)

tree = l1.parse(code)


+ 1
- 1
tests/test_tools.py View File

@@ -24,7 +24,7 @@ class TestStandalone(TestCase):
standalone.gen_standalone(Lark(grammar, parser='lalr'), out=code_buf, compress=compress)
code = code_buf.getvalue()

context = {'__doc__': None}
context = {'__doc__': None, '__name__': 'test_standalone'}
exec(code, context)
return context



+ 1
- 1
tox.ini View File

@@ -1,5 +1,5 @@
[tox]
envlist = py27, py34, py35, py36, py37, py38, py39, py310, pypy, pypy3
envlist = py36, py37, py38, py39, pypy, pypy3
skip_missing_interpreters=true

[testenv]


Loading…
Cancel
Save