Merge branch 'master' into 0.7c

Update from latest 0.6
6 years ago · 085eba3b8e
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 *.pyc
 *.pyo
 /lark_parser.egg-info/**
 tags
 .vscode
--- a/docs/classes.md
+++ b/docs/classes.md
@@ -76,7 +76,11 @@ Returns all nodes of the tree whose data equals the given data.

 #### iter_subtrees(self)

 Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG)
 Iterates over all the subtrees, never returning to the same node twice (Lark's parse-tree is actually a DAG).

 #### iter_subtrees_topdown(self)

 Iterates over all the subtrees, return nodes in order like pretty() does.

 #### \_\_eq\_\_, \_\_hash\_\_

--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -43,7 +43,7 @@ Literals can be one of:
 * `/regular expression+/`
 * `"case-insensitive string"i`
 * `/re with flags/imulx`
 * Literal range: `"a".."z"`, `"1..9"`, etc.
 * Literal range: `"a".."z"`, `"1".."9"`, etc.

 #### Notes for when using a lexer:

@@ -145,4 +145,4 @@ If the module path is relative, such as `.path.to.file`, Lark will attempt to lo

 ### %declare

 Declare a terminal without defining it. Useful for plugins.
 Declare a terminal without defining it. Useful for plugins.
--- a/docs/how_to_use.md
+++ b/docs/how_to_use.md
@@ -52,3 +52,19 @@ class MyTransformer(Transformer):
 new_tree = MyTransformer().transform(tree)
 ```

 ## LALR usage

 By default Lark silently resolves Shift/Reduce conflicts as Shift. To enable warnings pass `debug=True`. To get the messages printed you have to configure `logging` framework beforehand. For example:

 ```python
 from lark import Lark
 import logging
 logging.basicConfig(level=logging.DEBUG)

 collision_grammar = '''
 start: as as
 as: a*
 a: 'a'
 '''
 p = Lark(collision_grammar, parser='lalr', debug=True)
 ```
--- a/examples/python3.lark
+++ b/examples/python3.lark
@@ -161,7 +161,7 @@ yield_expr: "yield" [yield_arg]
 yield_arg: "from" test | testlist


 number: DEC_NUMBER | HEX_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER
 number: DEC_NUMBER | HEX_NUMBER | BIN_NUMBER | OCT_NUMBER | FLOAT_NUMBER | IMAG_NUMBER
 string: STRING | LONG_STRING
 // Tokens

--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -51,12 +51,20 @@ class Rule(object):
    def __repr__(self):
        return 'Rule(%r, %r, %r, %r)' % (self.origin, self.expansion, self.alias, self.options)

    def __hash__(self):
        return hash((self.origin, tuple(self.expansion)))
    def __eq__(self, other):
        if not isinstance(other, Rule):
            return False
        return self.origin == other.origin and self.expansion == other.expansion


 class RuleOptions:
    def __init__(self, keep_all_tokens=False, expand1=False, priority=None):
        self.keep_all_tokens = keep_all_tokens
        self.expand1 = expand1
        self.priority = priority
        self.empty_indices = ()

    def __repr__(self):
        return 'RuleOptions(%r, %r, %r)' % (
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -45,8 +45,11 @@ class LarkOptions(object):
        profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
        propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
        lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
        maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None
    """
    __doc__ += OPTIONS_DOC
    if __doc__:
        __doc__ += OPTIONS_DOC

    def __init__(self, options_dict):
        o = dict(options_dict)

@@ -63,6 +66,7 @@ class LarkOptions(object):
        self.ambiguity = o.pop('ambiguity', 'auto')
        self.propagate_positions = o.pop('propagate_positions', False)
        self.lexer_callbacks = o.pop('lexer_callbacks', {})
        self.maybe_placeholders = o.pop('maybe_placeholders', False)

        assert self.parser in ('earley', 'lalr', 'cyk', None)

@@ -167,7 +171,8 @@ class Lark:

        if self.profiler: self.profiler.enter_section('outside_lark')

    __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC
    if __init__.__doc__:
        __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC

    def _build_lexer(self):
        return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
@@ -175,7 +180,7 @@ class Lark:
    def _build_parser(self):
        self.parser_class = get_frontend(self.options.parser, self.options.lexer)

        self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit')
        self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
        callback = self._parse_tree_builder.create_callback(self.options.transformer)
        if self.profiler:
            for f in dir(callback):
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -3,7 +3,7 @@
 import os.path
 import sys
 from ast import literal_eval
 from copy import deepcopy
 from copy import copy, deepcopy

 from .utils import bfs
 from .lexer import Token, TerminalDef, PatternStr, PatternRE
@@ -26,6 +26,8 @@ EXT = '.lark'

 _RE_FLAGS = 'imslux'

 _EMPTY = Symbol('__empty__')

 _TERMINAL_NAMES = {
    '.' : 'DOT',
    ',' : 'COMMA',
@@ -151,7 +153,6 @@ RULES = {
    'literal': ['REGEXP', 'STRING'],
 }


@inline_args
 class EBNF_to_BNF(Transformer_InPlace):
    def __init__(self):
@@ -175,7 +176,14 @@ class EBNF_to_BNF(Transformer_InPlace):

    def expr(self, rule, op, *args):
        if op.value == '?':
            return ST('expansions', [rule, ST('expansion', [])])
            if isinstance(rule, Terminal) and rule.filter_out and not (
                    self.rule_options and self.rule_options.keep_all_tokens):
                empty = ST('expansion', [])
            elif isinstance(rule, NonTerminal) and rule.name.startswith('_'):
                empty = ST('expansion', [])
            else:
                empty = _EMPTY
            return ST('expansions', [rule, empty])
        elif op.value == '+':
            # a : b c+ d
            #   -->
@@ -481,7 +489,8 @@ class Grammar:
        for name, rule_tree, options in rule_defs:
            ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
            tree = transformer.transform(rule_tree)
            rules.append((name, ebnf_to_bnf.transform(tree), options))
            res = ebnf_to_bnf.transform(tree)
            rules.append((name, res, options))
        rules += ebnf_to_bnf.new_rules

        assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"
@@ -499,9 +508,17 @@ class Grammar:
                if alias and name.startswith('_'):
                    raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))

                assert all(isinstance(x, Symbol) for x in expansion), expansion
                empty_indices = [x==_EMPTY for i, x in enumerate(expansion)]
                if any(empty_indices):
                    assert options
                    exp_options = copy(options)
                    exp_options.empty_indices = empty_indices
                    expansion = [x for x in expansion if x!=_EMPTY]
                else:
                    exp_options = options

                rule = Rule(NonTerminal(name), expansion, alias, options)
                assert all(isinstance(x, Symbol) for x in expansion), expansion
                rule = Rule(NonTerminal(name), expansion, alias, exp_options)
                compiled_rules.append(rule)

        return terminals, compiled_rules, self.ignore
@@ -526,8 +543,12 @@ def import_grammar(grammar_path, base_paths=[]):
    return _imported_grammars[grammar_path]

 def import_from_grammar_into_namespace(grammar, namespace, aliases):
    """Returns all rules and terminals of grammar, prepended
    with a 'namespace' prefix, except for those which are aliased.
    """

    imported_terms = dict(grammar.term_defs)
    imported_rules = {n:(n,t,o) for n,t,o in grammar.rule_defs}
    imported_rules = {n:(n,deepcopy(t),o) for n,t,o in grammar.rule_defs}
    
    term_defs = []
    rule_defs = []
@@ -535,7 +556,10 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):
    def rule_dependencies(symbol):
        if symbol.type != 'RULE':
            return []
        _, tree, _ = imported_rules[symbol]
        try:
            _, tree, _ = imported_rules[symbol]
        except KeyError:
            raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace))
        return tree.scan_values(lambda x: x.type in ('RULE', 'TERMINAL'))

    def get_namespace_name(name):
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -1,7 +1,5 @@
 from .exceptions import GrammarError
 from .utils import suppress
 from .lexer import Token
 from .grammar import Rule
 from .tree import Tree
 from .visitors import InlineTransformer # XXX Deprecated

@@ -20,7 +18,6 @@ class ExpandSingleChild:
        else:
            return self.node_builder(children)


 class PropagatePositions:
    def __init__(self, node_builder):
        self.node_builder = node_builder
@@ -63,7 +60,50 @@ class PropagatePositions:


 class ChildFilter:
    "Optimized childfilter (assumes no duplication in parse tree, so it's safe to change it)"
    def __init__(self, to_include, append_none, node_builder):
        self.node_builder = node_builder
        self.to_include = to_include
        self.append_none = append_none

    def __call__(self, children):
        filtered = []

        for i, to_expand, add_none in self.to_include:
            if add_none:
                filtered += [None] * add_none
            if to_expand:
                filtered += children[i].children
            else:
                filtered.append(children[i])

        if self.append_none:
            filtered += [None] * self.append_none

        return self.node_builder(filtered)

 class ChildFilterLALR(ChildFilter):
    "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"

    def __call__(self, children):
        filtered = []
        for i, to_expand, add_none in self.to_include:
            if add_none:
                filtered += [None] * add_none
            if to_expand:
                if filtered:
                    filtered += children[i].children
                else:   # Optimize for left-recursion
                    filtered = children[i].children
            else:
                filtered.append(children[i])

        if self.append_none:
            filtered += [None] * self.append_none

        return self.node_builder(filtered)

 class ChildFilterLALR_NoPlaceholders(ChildFilter):
    "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"
    def __init__(self, to_include, node_builder):
        self.node_builder = node_builder
        self.to_include = to_include
@@ -83,13 +123,6 @@ class ChildFilter:
 def _should_expand(sym):
    return not sym.is_term and sym.name.startswith('_')

 def maybe_create_child_filter(expansion, keep_all_tokens):
    to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion)
                  if keep_all_tokens or not (sym.is_term and sym.filter_out)]

    if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
        return partial(ChildFilter, to_include)

 class AmbiguousExpander:
    """Deal with the case where we're expanding children ('_rule') into a parent but the children
       are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
@@ -100,6 +133,33 @@ class AmbiguousExpander:
        self.tree_class = tree_class
        self.to_expand = to_expand

 def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous, _empty_indices):
    # Prepare empty_indices as: How many Nones to insert at each index?
    if _empty_indices:
        assert _empty_indices.count(False) == len(expansion)
        s = ''.join(str(int(b)) for b in _empty_indices)
        empty_indices = [len(ones) for ones in s.split('0')]
        assert len(empty_indices) == len(expansion)+1, (empty_indices, len(expansion))
    else:
        empty_indices = [0] * (len(expansion)+1)

    to_include = []
    nones_to_add = 0
    for i, sym in enumerate(expansion):
        nones_to_add += empty_indices[i]
        if keep_all_tokens or not (sym.is_term and sym.filter_out):
            to_include.append((i, _should_expand(sym), nones_to_add))
            nones_to_add = 0

    nones_to_add += empty_indices[len(expansion)]

    if _empty_indices or len(to_include) < len(expansion) or any(to_expand for i, to_expand,_ in to_include):
        if _empty_indices or ambiguous:
            return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add)
        else:
            # LALR without placeholders
            return partial(ChildFilterLALR_NoPlaceholders, [(i, x) for i,x,_ in to_include])

    def __call__(self, children):
        def _is_ambig_tree(child):
            return hasattr(child, 'data') and child.data == '_ambig'
@@ -127,11 +187,12 @@ def ptb_inline_args(func):
    return f

 class ParseTreeBuilder:
    def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False):
    def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
        self.tree_class = tree_class
        self.propagate_positions = propagate_positions
        self.always_keep_all_tokens = keep_all_tokens
        self.ambiguous = ambiguous
        self.maybe_placeholders = maybe_placeholders

        self.rule_builders = list(self._init_builders(rules))

@@ -145,7 +206,7 @@ class ParseTreeBuilder:

            wrapper_chain = filter(None, [
                (expand_single_child and not rule.alias) and ExpandSingleChild,
                maybe_create_child_filter(rule.expansion, keep_all_tokens),
                maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None),
                self.propagate_positions and PropagatePositions,
                self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
            ])
--- a/lark/tree.py
+++ b/lark/tree.py
@@ -5,6 +5,7 @@ except ImportError:

 from copy import deepcopy


 ###{standalone
 class Meta:
    pass
@@ -42,6 +43,7 @@ class Tree(object):

    def pretty(self, indent_str='  '):
        return ''.join(self._pretty(0, indent_str))

    def __eq__(self, other):
        try:
            return self.data == other.data and self.children == other.children
@@ -99,12 +101,22 @@ class Tree(object):
                yield x
                seen.add(id(x))

    def iter_subtrees_topdown(self):
        stack = [self]
        while stack:
            node = stack.pop()
            if not isinstance(node, Tree):
                continue
            yield node
            for n in reversed(node.children):
                stack.append(n)

    def __deepcopy__(self, memo):
        return type(self)(self.data, deepcopy(self.children, memo))

    def copy(self):
        return type(self)(self.data, self.children)

    def set(self, data, children):
        self.data = data
        self.children = children
--- a/lark/visitors.py
+++ b/lark/visitors.py
@@ -69,6 +69,10 @@ class Transformer:
            if name.startswith('_') or name in libmembers:
                continue

            # Skip if v_args already applied (at the function level)
            if hasattr(cls.__dict__[name], 'vargs_applied'):
                continue

            static = isinstance(cls.__dict__[name], (staticmethod, classmethod))
            setattr(cls, name, decorator(value, static=static, **kwargs))
        return cls
@@ -241,6 +245,7 @@ def _visitor_args_func_dec(func, inline=False, meta=False, whole_tree=False, sta
        f = wraps(func)(create_decorator(func, False))
    else:
        f = smart_decorator(func, create_decorator)
    f.vargs_applied = True
    f.inline = inline
    f.meta = meta
    f.whole_tree = whole_tree
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1069,7 +1069,7 @@ def _make_parser_test(LEXER, PARSER):
            bb_.1: "bb"
            """

            l = _Lark(grammar, ambiguity='resolve__antiscore_sum')
            l = Lark(grammar, ambiguity='resolve__antiscore_sum')
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

@@ -1082,7 +1082,7 @@ def _make_parser_test(LEXER, PARSER):
            bb_: "bb"
            """

            l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
            l = Lark(grammar, ambiguity='resolve__antiscore_sum')
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'indirection')

@@ -1095,7 +1095,7 @@ def _make_parser_test(LEXER, PARSER):
            bb_.3: "bb"
            """

            l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
            l = Lark(grammar, ambiguity='resolve__antiscore_sum')
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

@@ -1108,7 +1108,7 @@ def _make_parser_test(LEXER, PARSER):
            bb_.3: "bb"
            """

            l = Lark(grammar, parser='earley', ambiguity='resolve__antiscore_sum')
            l = Lark(grammar, ambiguity='resolve__antiscore_sum')
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'indirection')

@@ -1282,6 +1282,45 @@ def _make_parser_test(LEXER, PARSER):
            res = p.parse('B')
            self.assertEqual(len(res.children), 3)

        @unittest.skipIf(PARSER=='cyk', "Empty rules")
        def test_maybe_placeholders(self):
            # Anonymous tokens shouldn't count
            p = _Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True)
            self.assertEqual(p.parse("").children, [])

            # Anonymous tokens shouldn't count, other constructs should
            p = _Lark("""start: A? "b"? _c?
                        A: "a"
                        _c: "c" """, maybe_placeholders=True)
            self.assertEqual(p.parse("").children, [None])

            p = _Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True)
            self.assertEqual(p.parse("").children, [None, None, None])
            self.assertEqual(p.parse("a").children, ['a', None, None])
            self.assertEqual(p.parse("b").children, [None, 'b', None])
            self.assertEqual(p.parse("c").children, [None, None, 'c'])
            self.assertEqual(p.parse("ab").children, ['a', 'b', None])
            self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
            self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
            self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])

            p = _Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True)
            self.assertEqual(p.parse("b").children, [None, 'b', None])
            self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
            self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
            self.assertEqual(p.parse("babbcabcb").children,
                [None, 'b', None, 
                 'a', 'b', None, 
                 None, 'b', 'c',
                 'a', 'b', 'c',
                 None, 'b', None])

            p = _Lark("""!start: "a"? "c"? "b"+ "a"? "d"? """, maybe_placeholders=True)
            self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
            self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
            self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
            self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])



    _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
--- a/tests/test_trees.py
+++ b/tests/test_trees.py
@@ -21,6 +21,17 @@ class TestTrees(TestCase):
        data = pickle.dumps(s)
        assert pickle.loads(data) == s

    def test_iter_subtrees(self):
        expected = [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z'),
                    Tree('a', [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')])]
        nodes = list(self.tree1.iter_subtrees())
        self.assertEqual(nodes, expected)

    def test_iter_subtrees_topdown(self):
        expected = [Tree('a', [Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')]),
                    Tree('b', 'x'), Tree('c', 'y'), Tree('d', 'z')]
        nodes = list(self.tree1.iter_subtrees_topdown())
        self.assertEqual(nodes, expected)

    def test_interp(self):
        t = Tree('a', [Tree('b', []), Tree('c', []), 'd'])
@@ -117,6 +128,22 @@ class TestTrees(TestCase):
        x = MyTransformer().transform( Tree('hello', [2]))
        self.assertEqual(x, 'hello')

    def test_vargs_override(self):
        t = Tree('add', [Tree('sub', [Tree('i', ['3']), Tree('f', ['1.1'])]), Tree('i', ['1'])])

        @v_args(inline=True)
        class T(Transformer):
            i = int
            f = float
            sub = lambda self, a, b: a-b

            @v_args(inline=False)
            def add(self, values):
                return sum(values)

        res = T().transform(t)
        self.assertEqual(res, 2.9)


 if __name__ == '__main__':
    unittest.main()