Browse Source

Added support for rule priorities in LALR

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
Erez Sh 4 years ago
parent
commit
e84a64fc52
4 changed files with 25 additions and 23 deletions
  1. +1
    -6
      lark/lark.py
  2. +14
    -4
      lark/parsers/lalr_analysis.py
  3. +0
    -1
      lark/parsers/lalr_parser.py
  4. +10
    -12
      tests/test_parser.py

+ 1
- 6
lark/lark.py View File

@@ -272,12 +272,7 @@ class Lark(Serialize):
'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)


if self.options.priority == 'auto': if self.options.priority == 'auto':
if self.options.parser in ('earley', 'cyk', ):
self.options.priority = 'normal'
elif self.options.parser in ('lalr', ):
self.options.priority = None
elif self.options.priority in ('invert', 'normal'):
assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time"
self.options.priority = 'normal'


assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority)
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'


+ 14
- 4
lark/parsers/lalr_analysis.py View File

@@ -253,7 +253,14 @@ class LALR_Analyzer(GrammarAnalyzer):
actions[la] = (Shift, next_state.closure) actions[la] = (Shift, next_state.closure)
for la, rules in state.lookaheads.items(): for la, rules in state.lookaheads.items():
if len(rules) > 1: if len(rules) > 1:
reduce_reduce.append((la, rules))
# Try to resolve conflict based on priority
p = [(r.options.priority or 0, r) for r in rules]
p.sort(key=lambda r: r[0], reverse=True)
best, second_best = p[:2]
if best[0] > second_best[0]:
rules = [best[1]]
else:
reduce_reduce.append((state, la, rules))
if la in actions: if la in actions:
if self.debug: if self.debug:
logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name) logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
@@ -263,9 +270,12 @@ class LALR_Analyzer(GrammarAnalyzer):
m[state] = { k.name: v for k, v in actions.items() } m[state] = { k.name: v for k, v in actions.items() }


if reduce_reduce: if reduce_reduce:
msgs = [ 'Reduce/Reduce collision in %s between the following rules: %s'
% (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ]))
for la, rules in reduce_reduce]
msgs = []
for state, la, rules in reduce_reduce:
msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ]))
if self.debug:
msg += '\n collision occured in state: {%s\n }' % ''.join(['\n\t' + str(x) for x in state.closure])
msgs.append(msg)
raise GrammarError('\n\n'.join(msgs)) raise GrammarError('\n\n'.join(msgs))


states = { k.closure: v for k, v in m.items() } states = { k.closure: v for k, v in m.items() }


+ 0
- 1
lark/parsers/lalr_parser.py View File

@@ -13,7 +13,6 @@ from .lalr_puppet import ParserPuppet


class LALR_Parser(object): class LALR_Parser(object):
def __init__(self, parser_conf, debug=False): def __init__(self, parser_conf, debug=False):
assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization"
analysis = LALR_Analyzer(parser_conf, debug=debug) analysis = LALR_Analyzer(parser_conf, debug=debug)
analysis.compute_lalr() analysis.compute_lalr()
callbacks = parser_conf.callbacks callbacks = parser_conf.callbacks


+ 10
- 12
tests/test_parser.py View File

@@ -1781,7 +1781,7 @@ def _make_parser_test(LEXER, PARSER):
%import bad_test.NUMBER %import bad_test.NUMBER
""" """
self.assertRaises(IOError, _Lark, grammar) self.assertRaises(IOError, _Lark, grammar)
@unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic") @unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic")
def test_postlex_declare(self): # Note: this test does a lot. maybe split it up? def test_postlex_declare(self): # Note: this test does a lot. maybe split it up?
class TestPostLexer: class TestPostLexer:
@@ -1805,8 +1805,8 @@ def _make_parser_test(LEXER, PARSER):
tree = parser.parse(test_file) tree = parser.parse(test_file)
self.assertEqual(tree.children, [Token('B', 'A')]) self.assertEqual(tree.children, [Token('B', 'A')])


@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization(self):
@unittest.skipIf(PARSER == 'cyk', "Doesn't work for CYK")
def test_prioritization(self):
"Tests effect of priority on result" "Tests effect of priority on result"


grammar = """ grammar = """
@@ -1815,7 +1815,6 @@ def _make_parser_test(LEXER, PARSER):
b.2: "a" b.2: "a"
""" """


# l = Lark(grammar, parser='earley', lexer='standard')
l = _Lark(grammar) l = _Lark(grammar)
res = l.parse("a") res = l.parse("a")
self.assertEqual(res.children[0].data, 'b') self.assertEqual(res.children[0].data, 'b')
@@ -1827,14 +1826,13 @@ def _make_parser_test(LEXER, PARSER):
""" """


l = _Lark(grammar) l = _Lark(grammar)
# l = Lark(grammar, parser='earley', lexer='standard')
res = l.parse("a") res = l.parse("a")
self.assertEqual(res.children[0].data, 'a') self.assertEqual(res.children[0].data, 'a')






@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization_sum(self):
@unittest.skipIf(PARSER != 'earley' or LEXER == 'standard', "Currently only Earley supports priority sum in rules")
def test_prioritization_sum(self):
"Tests effect of priority on result" "Tests effect of priority on result"


grammar = """ grammar = """
@@ -1846,7 +1844,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.1: "bb" bb_.1: "bb"
""" """


l = Lark(grammar, priority="invert")
l = _Lark(grammar, priority="invert")
res = l.parse('abba') res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')


@@ -1859,7 +1857,7 @@ def _make_parser_test(LEXER, PARSER):
bb_: "bb" bb_: "bb"
""" """


l = Lark(grammar, priority="invert")
l = _Lark(grammar, priority="invert")
res = l.parse('abba') res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'indirection') self.assertEqual(''.join(child.data for child in res.children), 'indirection')


@@ -1872,7 +1870,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.3: "bb" bb_.3: "bb"
""" """


l = Lark(grammar, priority="invert")
l = _Lark(grammar, priority="invert")
res = l.parse('abba') res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')


@@ -1885,7 +1883,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.3: "bb" bb_.3: "bb"
""" """


l = Lark(grammar, priority="invert")
l = _Lark(grammar, priority="invert")
res = l.parse('abba') res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'indirection') self.assertEqual(''.join(child.data for child in res.children), 'indirection')


@@ -2064,7 +2062,7 @@ def _make_parser_test(LEXER, PARSER):
# Anonymous tokens shouldn't count # Anonymous tokens shouldn't count
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, []) self.assertEqual(p.parse("").children, [])
# Unless keep_all_tokens=True # Unless keep_all_tokens=True
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True) p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True)
self.assertEqual(p.parse("").children, [None, None, None]) self.assertEqual(p.parse("").children, [None, None, None])


Loading…
Cancel
Save