Browse Source

Added prioritization to Earley. Use rule.1 etc. Highest priority will be selected in case of ambiguity.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
4eec924403
8 changed files with 55 additions and 14 deletions
  1. +1
    -1
      lark/common.py
  2. +15
    -5
      lark/load_grammar.py
  3. +1
    -1
      lark/parse_tree_builder.py
  4. +2
    -2
      lark/parser_frontends.py
  5. +5
    -0
      lark/parsers/earley.py
  6. +6
    -5
      lark/parsers/grammar_analysis.py
  7. +1
    -0
      lark/parsers/lalr_parser.py
  8. +24
    -0
      tests/test_parser.py

+ 1
- 1
lark/common.py View File

@@ -41,7 +41,7 @@ class LexerConf:

class ParserConf:
def __init__(self, rules, callback, start):
assert all(len(r)==3 for r in rules)
assert all(len(r) == 4 for r in rules)
self.rules = rules
self.callback = callback
self.start = start


+ 15
- 5
lark/load_grammar.py View File

@@ -75,6 +75,7 @@ TOKENS = {
'_TO': '->',
'_IGNORE': r'%ignore',
'_IMPORT': r'%import',
'NUMBER': '\d+',
}

RULES = {
@@ -82,7 +83,8 @@ RULES = {
'_list': ['_item', '_list _item'],
'_item': ['rule', 'token', 'statement', '_NL'],

'rule': ['RULE _COLON expansions _NL'],
'rule': ['RULE _COLON expansions _NL',
'RULE _DOT NUMBER _COLON expansions _NL'],
'expansions': ['alias',
'expansions _OR alias',
'expansions _NL _OR alias'],
@@ -470,21 +472,29 @@ class Grammar:


class RuleOptions:
def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False):
def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None):
self.keep_all_tokens = keep_all_tokens
self.expand1 = expand1
self.create_token = create_token # used for scanless postprocessing
self.priority = priority

self.filter_out = filter_out # remove this rule from the tree
# used for "token"-rules in scanless
@classmethod
def from_rule(cls, name, expansions):
def from_rule(cls, name, *x):
if len(x) > 1:
priority, expansions = x
priority = int(priority)
else:
expansions ,= x
priority = None

keep_all_tokens = name.startswith('!')
name = name.lstrip('!')
expand1 = name.startswith('?')
name = name.lstrip('?')

return name, expansions, cls(keep_all_tokens, expand1)
return name, expansions, cls(keep_all_tokens, expand1, priority=priority)



@@ -605,7 +615,7 @@ class GrammarLoader:
raise GrammarError("Token '%s' defined more than once" % name)
token_names.add(name)

rules = [RuleOptions.from_rule(name, x) for name, x in rule_defs]
rules = [RuleOptions.from_rule(*x) for x in rule_defs]

rule_names = set()
for name, _x, _o in rules:


+ 1
- 1
lark/parse_tree_builder.py View File

@@ -123,6 +123,6 @@ class ParseTreeBuilder:
raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin))
setattr(callback, callback_name, alias_handler)

new_rules.append(( _origin, expansion, callback_name ))
new_rules.append(( _origin, expansion, callback_name, options ))

return new_rules, callback

+ 2
- 2
lark/parser_frontends.py View File

@@ -129,7 +129,7 @@ class Earley_NoLex:
def __init__(self, lexer_conf, parser_conf, options=None):
self.token_by_name = {t.name:t for t in lexer_conf.tokens}

rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules]
rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules]

resolve_ambiguity = (options.ambiguity=='resolve') if options else True
self.parser = earley.Parser(rules,
@@ -156,7 +156,7 @@ class Earley(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
WithLexer.__init__(self, lexer_conf)

rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules]
rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules]

resolve_ambiguity = (options.ambiguity=='resolve') if options else True
self.parser = earley.Parser(rules,


+ 5
- 0
lark/parsers/earley.py View File

@@ -224,6 +224,11 @@ class ApplyCallbacks(Transformer_NoRecurse):
return Tree(rule.origin, children)

def _compare_rules(rule1, rule2):
if rule1.options and rule2.options:
if rule1.options.priority is not None and rule2.options.priority is not None:
assert rule1.options.priority != rule2.options.priority, "Priority is the same between both rules: %s == %s" % (rule1, rule2)
return -compare(rule1.options.priority, rule2.options.priority)

if rule1.origin != rule2.origin:
return 0
c = compare( len(rule1.expansion), len(rule2.expansion))


+ 6
- 5
lark/parsers/grammar_analysis.py View File

@@ -7,10 +7,11 @@ class Rule(object):
origin : a symbol
expansion : a list of symbols
"""
def __init__(self, origin, expansion, alias=None):
def __init__(self, origin, expansion, alias=None, options=None):
self.origin = origin
self.expansion = expansion
self.alias = alias
self.options = options

def __repr__(self):
return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion)))
@@ -111,12 +112,12 @@ class GrammarAnalyzer(object):
self.debug = debug
rule_tuples = list(rule_tuples)
rule_tuples.append(('$root', [start_symbol, '$end']))
rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]
rule_tuples = [(t[0], t[1], None, None) if len(t)==2 else t for t in rule_tuples]

self.rules = set()
self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
for origin, exp, alias in rule_tuples:
r = Rule( origin, exp, alias )
self.rules_by_origin = {o: [] for o, _x, _a, _opt in rule_tuples}
for origin, exp, alias, options in rule_tuples:
r = Rule( origin, exp, alias, options )
self.rules.add(r)
self.rules_by_origin[origin].append(r)



+ 1
- 0
lark/parsers/lalr_parser.py View File

@@ -9,6 +9,7 @@ from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT

class Parser(object):
def __init__(self, parser_conf):
assert all(o is None or o.priority is None for n,x,a,o in parser_conf.rules), "LALR doesn't yet support prioritization"
self.analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start)
self.analysis.compute_lookahead()
self.callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)


+ 24
- 0
tests/test_parser.py View File

@@ -621,6 +621,30 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(len(tree.children), 2)


@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization(self):
"Tests effect of priority on result"

grammar = """
start: a | b
a.1: "a"
b.2: "a"
"""

l = Lark(grammar, parser='earley', lexer='standard')
res = l.parse("a")
self.assertEqual(res.children[0].data, 'b')

grammar = """
start: a | b
a.2: "a"
b.1: "a"
"""

l = Lark(grammar, parser='earley', lexer='standard')
res = l.parse("a")
self.assertEqual(res.children[0].data, 'a')



_NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize()


Loading…
Cancel
Save