diff --git a/lark/lark.py b/lark/lark.py index c7bdfa0..05ad9b1 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -176,6 +176,9 @@ class LarkOptions(Serialize): # These option are only used outside of `load_grammar`. _LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'} +_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) +_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') + class Lark(Serialize): """Main interface for the library. @@ -272,16 +275,13 @@ class Lark(Serialize): 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) if self.options.priority == 'auto': - if self.options.parser in ('earley', 'cyk', ): - self.options.priority = 'normal' - elif self.options.parser in ('lalr', ): - self.options.priority = None - elif self.options.priority in ('invert', 'normal'): - assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time" - - assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) + self.options.priority = 'normal' + + if self.options.priority not in _VALID_PRIORITY_OPTIONS: + raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' - assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) + if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: + raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) # Parse the grammar file and compose the grammars (TODO) self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens) diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 8745f46..57a71bf 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -253,7 +253,14 @@ class LALR_Analyzer(GrammarAnalyzer): actions[la] = (Shift, next_state.closure) for la, rules in state.lookaheads.items(): if len(rules) > 1: - reduce_reduce.append((la, rules)) + # Try to resolve conflict based on priority + p = [(r.options.priority or 0, r) for r in rules] + p.sort(key=lambda r: r[0], reverse=True) + best, second_best = p[:2] + if best[0] > second_best[0]: + rules = [best[1]] + else: + reduce_reduce.append((state, la, rules)) if la in actions: if self.debug: logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name) @@ -263,9 +270,12 @@ class LALR_Analyzer(GrammarAnalyzer): m[state] = { k.name: v for k, v in actions.items() } if reduce_reduce: - msgs = [ 'Reduce/Reduce collision in %s between the following rules: %s' - % (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])) - for la, rules in reduce_reduce] + msgs = [] + for state, la, rules in reduce_reduce: + msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ])) + if self.debug: + msg += '\n collision occured in state: {%s\n }' % ''.join(['\n\t' + str(x) for x in state.closure]) + msgs.append(msg) raise GrammarError('\n\n'.join(msgs)) states = { k.closure: v for k, v in m.items() } diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 433f3ef..1c98f3a 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -13,7 +13,6 @@ from .lalr_puppet import ParserPuppet class LALR_Parser(object): def __init__(self, parser_conf, debug=False): - assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization" analysis = LALR_Analyzer(parser_conf, debug=debug) analysis.compute_lalr() callbacks = parser_conf.callbacks diff --git a/tests/test_parser.py b/tests/test_parser.py index c38b81e..49e661e 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1781,7 +1781,7 @@ def _make_parser_test(LEXER, PARSER): %import bad_test.NUMBER """ self.assertRaises(IOError, _Lark, grammar) - + @unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic") def test_postlex_declare(self): # Note: this test does a lot. maybe split it up? class TestPostLexer: @@ -1805,8 +1805,8 @@ def _make_parser_test(LEXER, PARSER): tree = parser.parse(test_file) self.assertEqual(tree.children, [Token('B', 'A')]) - @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") - def test_earley_prioritization(self): + @unittest.skipIf(PARSER == 'cyk', "Doesn't work for CYK") + def test_prioritization(self): "Tests effect of priority on result" grammar = """ @@ -1815,7 +1815,6 @@ def _make_parser_test(LEXER, PARSER): b.2: "a" """ - # l = Lark(grammar, parser='earley', lexer='standard') l = _Lark(grammar) res = l.parse("a") self.assertEqual(res.children[0].data, 'b') @@ -1827,14 +1826,31 @@ def _make_parser_test(LEXER, PARSER): """ l = _Lark(grammar) - # l = Lark(grammar, parser='earley', lexer='standard') res = l.parse("a") self.assertEqual(res.children[0].data, 'a') + grammar = """ + start: a | b + a.2: "A"+ + b.1: "A"+ "B"? + """ + + l = _Lark(grammar) + res = l.parse("AAAA") + self.assertEqual(res.children[0].data, 'a') + + l = _Lark(grammar) + res = l.parse("AAAB") + self.assertEqual(res.children[0].data, 'b') + l = _Lark(grammar, priority="invert") + res = l.parse("AAAA") + self.assertEqual(res.children[0].data, 'b') - @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") - def test_earley_prioritization_sum(self): + + + @unittest.skipIf(PARSER != 'earley' or LEXER == 'standard', "Currently only Earley supports priority sum in rules") + def test_prioritization_sum(self): "Tests effect of priority on result" grammar = """ @@ -1846,7 +1862,7 @@ def _make_parser_test(LEXER, PARSER): bb_.1: "bb" """ - l = Lark(grammar, priority="invert") + l = _Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') @@ -1859,7 +1875,7 @@ def _make_parser_test(LEXER, PARSER): bb_: "bb" """ - l = Lark(grammar, priority="invert") + l = _Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'indirection') @@ -1872,7 +1888,7 @@ def _make_parser_test(LEXER, PARSER): bb_.3: "bb" """ - l = Lark(grammar, priority="invert") + l = _Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') @@ -1885,7 +1901,7 @@ def _make_parser_test(LEXER, PARSER): bb_.3: "bb" """ - l = Lark(grammar, priority="invert") + l = _Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'indirection') @@ -2064,7 +2080,7 @@ def _make_parser_test(LEXER, PARSER): # Anonymous tokens shouldn't count p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) self.assertEqual(p.parse("").children, []) - + # Unless keep_all_tokens=True p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True) self.assertEqual(p.parse("").children, [None, None, None])