BUGFIX: Fixes a subtle bug in the LALR(1) parser. See the new test for details.

7 anos atrás · 64d141e486
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -34,7 +34,7 @@ class Parser(object):

                raise UnexpectedToken(token, expected, seq, i)

        def reduce(rule, size):
        def reduce(rule, size, end=False):
            if size:
                s = value_stack[-size:]
                del state_stack[-size:]
@@ -44,7 +44,7 @@ class Parser(object):

            res = self.callbacks[rule](s)

            if len(state_stack) == 1 and rule.origin == self.analysis.start_symbol:
            if end and len(state_stack) == 1 and rule.origin == self.analysis.start_symbol:
                return res

            _action, new_state = get_action(rule.origin)
@@ -73,7 +73,7 @@ class Parser(object):
        while True:
            _action, rule = get_action('$end')
            assert _action == 'reduce'
            res = reduce(*rule)
            res = reduce(*rule, end=True)
            if res:
                assert state_stack == [self.analysis.init_state_idx] and not value_stack, len(state_stack)
                return res
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -584,6 +584,22 @@ def _make_parser_test(LEXER, PARSER):
            self.assertEqual(tree.children, ['a', 'A'])


        def test_reduce_cycle(self):
            """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
            It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
            """

            l = _Lark("""
                term: A
                    | term term

                A: "a"

            """, start='term')

            tree = l.parse("aa")
            self.assertEqual(len(tree.children), 2)