Merge pull request #713 from lark-parser/lalr_rule_priority

4 years ago · 16d41d1774
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -176,6 +176,9 @@ class LarkOptions(Serialize):
 # These option are only used outside of `load_grammar`.
 _LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'}

 _VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None)
 _VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest')


 class Lark(Serialize):
    """Main interface for the library.
@@ -272,16 +275,13 @@ class Lark(Serialize):
                'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)

        if self.options.priority == 'auto':
            if self.options.parser in ('earley', 'cyk', ):
                self.options.priority = 'normal'
            elif self.options.parser in ('lalr', ):
                self.options.priority = None
        elif self.options.priority in ('invert', 'normal'):
            assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time"

        assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority)
            self.options.priority = 'normal'

        if self.options.priority not in _VALID_PRIORITY_OPTIONS:
            raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS))
        assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
        assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )
        if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
            raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS))

        # Parse the grammar file and compose the grammars (TODO)
        self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens)
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -253,7 +253,14 @@ class LALR_Analyzer(GrammarAnalyzer):
                actions[la] = (Shift, next_state.closure)
            for la, rules in state.lookaheads.items():
                if len(rules) > 1:
                    reduce_reduce.append((la, rules))
                    # Try to resolve conflict based on priority
                    p = [(r.options.priority or 0, r) for r in rules]
                    p.sort(key=lambda r: r[0], reverse=True)
                    best, second_best = p[:2]
                    if best[0] > second_best[0]:
                        rules = [best[1]]
                    else:
                        reduce_reduce.append((state, la, rules))
                if la in actions:
                    if self.debug:
                        logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
@@ -263,9 +270,12 @@ class LALR_Analyzer(GrammarAnalyzer):
            m[state] = { k.name: v for k, v in actions.items() }

        if reduce_reduce:
            msgs = [ 'Reduce/Reduce collision in %s between the following rules: %s'
                     % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ]))
                for la, rules in reduce_reduce]
            msgs = []
            for state, la, rules in reduce_reduce:
                msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ]))
                if self.debug:
                    msg += '\n    collision occured in state: {%s\n    }' % ''.join(['\n\t' + str(x) for x in state.closure])
                msgs.append(msg)
            raise GrammarError('\n\n'.join(msgs))

        states = { k.closure: v for k, v in m.items() }
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -13,7 +13,6 @@ from .lalr_puppet import ParserPuppet

 class LALR_Parser(object):
    def __init__(self, parser_conf, debug=False):
        assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization"
        analysis = LALR_Analyzer(parser_conf, debug=debug)
        analysis.compute_lalr()
        callbacks = parser_conf.callbacks
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1781,7 +1781,7 @@ def _make_parser_test(LEXER, PARSER):
            %import bad_test.NUMBER
            """
            self.assertRaises(IOError, _Lark, grammar)
        

        @unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic")
        def test_postlex_declare(self): # Note: this test does a lot. maybe split it up?
            class TestPostLexer:
@@ -1805,8 +1805,8 @@ def _make_parser_test(LEXER, PARSER):
            tree = parser.parse(test_file)
            self.assertEqual(tree.children, [Token('B', 'A')])

        @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
        def test_earley_prioritization(self):
        @unittest.skipIf(PARSER == 'cyk', "Doesn't work for CYK")
        def test_prioritization(self):
            "Tests effect of priority on result"

            grammar = """
@@ -1815,7 +1815,6 @@ def _make_parser_test(LEXER, PARSER):
            b.2: "a"
            """

            # l = Lark(grammar, parser='earley', lexer='standard')
            l = _Lark(grammar)
            res = l.parse("a")
            self.assertEqual(res.children[0].data, 'b')
@@ -1827,14 +1826,31 @@ def _make_parser_test(LEXER, PARSER):
            """

            l = _Lark(grammar)
            # l = Lark(grammar, parser='earley', lexer='standard')
            res = l.parse("a")
            self.assertEqual(res.children[0].data, 'a')

            grammar = """
            start: a | b
            a.2: "A"+
            b.1: "A"+ "B"?
            """

            l = _Lark(grammar)
            res = l.parse("AAAA")
            self.assertEqual(res.children[0].data, 'a')

            l = _Lark(grammar)
            res = l.parse("AAAB")
            self.assertEqual(res.children[0].data, 'b')

            l = _Lark(grammar, priority="invert")
            res = l.parse("AAAA")
            self.assertEqual(res.children[0].data, 'b')

        @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
        def test_earley_prioritization_sum(self):


        @unittest.skipIf(PARSER != 'earley' or LEXER == 'standard', "Currently only Earley supports priority sum in rules")
        def test_prioritization_sum(self):
            "Tests effect of priority on result"

            grammar = """
@@ -1846,7 +1862,7 @@ def _make_parser_test(LEXER, PARSER):
            bb_.1: "bb"
            """

            l = Lark(grammar, priority="invert")
            l = _Lark(grammar, priority="invert")
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

@@ -1859,7 +1875,7 @@ def _make_parser_test(LEXER, PARSER):
            bb_: "bb"
            """

            l = Lark(grammar, priority="invert")
            l = _Lark(grammar, priority="invert")
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'indirection')

@@ -1872,7 +1888,7 @@ def _make_parser_test(LEXER, PARSER):
            bb_.3: "bb"
            """

            l = Lark(grammar, priority="invert")
            l = _Lark(grammar, priority="invert")
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

@@ -1885,7 +1901,7 @@ def _make_parser_test(LEXER, PARSER):
            bb_.3: "bb"
            """

            l = Lark(grammar, priority="invert")
            l = _Lark(grammar, priority="invert")
            res = l.parse('abba')
            self.assertEqual(''.join(child.data for child in res.children), 'indirection')

@@ -2064,7 +2080,7 @@ def _make_parser_test(LEXER, PARSER):
            # Anonymous tokens shouldn't count
            p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
            self.assertEqual(p.parse("").children, [])
            

            # Unless keep_all_tokens=True
            p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True)
            self.assertEqual(p.parse("").children, [None, None, None])