@@ -176,6 +176,9 @@ class LarkOptions(Serialize): | |||
# These option are only used outside of `load_grammar`. | |||
_LOAD_ALLOWED_OPTIONS = {'postlex', 'transformer', 'use_bytes', 'debug', 'g_regex_flags', 'regex', 'propagate_positions', 'tree_class'} | |||
_VALID_PRIORITY_OPTIONS = ('auto', 'normal', 'invert', None) | |||
_VALID_AMBIGUITY_OPTIONS = ('auto', 'resolve', 'explicit', 'forest') | |||
class Lark(Serialize): | |||
"""Main interface for the library. | |||
@@ -272,16 +275,13 @@ class Lark(Serialize): | |||
'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) | |||
if self.options.priority == 'auto': | |||
if self.options.parser in ('earley', 'cyk', ): | |||
self.options.priority = 'normal' | |||
elif self.options.parser in ('lalr', ): | |||
self.options.priority = None | |||
elif self.options.priority in ('invert', 'normal'): | |||
assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time" | |||
assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) | |||
self.options.priority = 'normal' | |||
if self.options.priority not in _VALID_PRIORITY_OPTIONS: | |||
raise ValueError("invalid priority option: %r. Must be one of %r" % (self.options.priority, _VALID_PRIORITY_OPTIONS)) | |||
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' | |||
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) | |||
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS: | |||
raise ValueError("invalid ambiguity option: %r. Must be one of %r" % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)) | |||
# Parse the grammar file and compose the grammars (TODO) | |||
self.grammar = load_grammar(grammar, self.source, re_module, self.options.keep_all_tokens) | |||
@@ -253,7 +253,14 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
actions[la] = (Shift, next_state.closure) | |||
for la, rules in state.lookaheads.items(): | |||
if len(rules) > 1: | |||
reduce_reduce.append((la, rules)) | |||
# Try to resolve conflict based on priority | |||
p = [(r.options.priority or 0, r) for r in rules] | |||
p.sort(key=lambda r: r[0], reverse=True) | |||
best, second_best = p[:2] | |||
if best[0] > second_best[0]: | |||
rules = [best[1]] | |||
else: | |||
reduce_reduce.append((state, la, rules)) | |||
if la in actions: | |||
if self.debug: | |||
logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name) | |||
@@ -263,9 +270,12 @@ class LALR_Analyzer(GrammarAnalyzer): | |||
m[state] = { k.name: v for k, v in actions.items() } | |||
if reduce_reduce: | |||
msgs = [ 'Reduce/Reduce collision in %s between the following rules: %s' | |||
% (la, ''.join([ '\n\t\t- ' + str(r) for r in rules ])) | |||
for la, rules in reduce_reduce] | |||
msgs = [] | |||
for state, la, rules in reduce_reduce: | |||
msg = 'Reduce/Reduce collision in %s between the following rules: %s' % (la, ''.join([ '\n\t- ' + str(r) for r in rules ])) | |||
if self.debug: | |||
msg += '\n collision occured in state: {%s\n }' % ''.join(['\n\t' + str(x) for x in state.closure]) | |||
msgs.append(msg) | |||
raise GrammarError('\n\n'.join(msgs)) | |||
states = { k.closure: v for k, v in m.items() } | |||
@@ -13,7 +13,6 @@ from .lalr_puppet import ParserPuppet | |||
class LALR_Parser(object): | |||
def __init__(self, parser_conf, debug=False): | |||
assert all(r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
analysis = LALR_Analyzer(parser_conf, debug=debug) | |||
analysis.compute_lalr() | |||
callbacks = parser_conf.callbacks | |||
@@ -1781,7 +1781,7 @@ def _make_parser_test(LEXER, PARSER): | |||
%import bad_test.NUMBER | |||
""" | |||
self.assertRaises(IOError, _Lark, grammar) | |||
@unittest.skipIf(LEXER=='dynamic', "%declare/postlex doesn't work with dynamic") | |||
def test_postlex_declare(self): # Note: this test does a lot. maybe split it up? | |||
class TestPostLexer: | |||
@@ -1805,8 +1805,8 @@ def _make_parser_test(LEXER, PARSER): | |||
tree = parser.parse(test_file) | |||
self.assertEqual(tree.children, [Token('B', 'A')]) | |||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
def test_earley_prioritization(self): | |||
@unittest.skipIf(PARSER == 'cyk', "Doesn't work for CYK") | |||
def test_prioritization(self): | |||
"Tests effect of priority on result" | |||
grammar = """ | |||
@@ -1815,7 +1815,6 @@ def _make_parser_test(LEXER, PARSER): | |||
b.2: "a" | |||
""" | |||
# l = Lark(grammar, parser='earley', lexer='standard') | |||
l = _Lark(grammar) | |||
res = l.parse("a") | |||
self.assertEqual(res.children[0].data, 'b') | |||
@@ -1827,14 +1826,31 @@ def _make_parser_test(LEXER, PARSER): | |||
""" | |||
l = _Lark(grammar) | |||
# l = Lark(grammar, parser='earley', lexer='standard') | |||
res = l.parse("a") | |||
self.assertEqual(res.children[0].data, 'a') | |||
grammar = """ | |||
start: a | b | |||
a.2: "A"+ | |||
b.1: "A"+ "B"? | |||
""" | |||
l = _Lark(grammar) | |||
res = l.parse("AAAA") | |||
self.assertEqual(res.children[0].data, 'a') | |||
l = _Lark(grammar) | |||
res = l.parse("AAAB") | |||
self.assertEqual(res.children[0].data, 'b') | |||
l = _Lark(grammar, priority="invert") | |||
res = l.parse("AAAA") | |||
self.assertEqual(res.children[0].data, 'b') | |||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
def test_earley_prioritization_sum(self): | |||
@unittest.skipIf(PARSER != 'earley' or LEXER == 'standard', "Currently only Earley supports priority sum in rules") | |||
def test_prioritization_sum(self): | |||
"Tests effect of priority on result" | |||
grammar = """ | |||
@@ -1846,7 +1862,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_.1: "bb" | |||
""" | |||
l = Lark(grammar, priority="invert") | |||
l = _Lark(grammar, priority="invert") | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
@@ -1859,7 +1875,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_: "bb" | |||
""" | |||
l = Lark(grammar, priority="invert") | |||
l = _Lark(grammar, priority="invert") | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||
@@ -1872,7 +1888,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_.3: "bb" | |||
""" | |||
l = Lark(grammar, priority="invert") | |||
l = _Lark(grammar, priority="invert") | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
@@ -1885,7 +1901,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_.3: "bb" | |||
""" | |||
l = Lark(grammar, priority="invert") | |||
l = _Lark(grammar, priority="invert") | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||
@@ -2064,7 +2080,7 @@ def _make_parser_test(LEXER, PARSER): | |||
# Anonymous tokens shouldn't count | |||
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("").children, []) | |||
# Unless keep_all_tokens=True | |||
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True, keep_all_tokens=True) | |||
self.assertEqual(p.parse("").children, [None, None, None]) | |||