Browse Source

Refactor + tests + additional splitting up.

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.12.0
MegaIng 4 years ago
parent
commit
845b6fa477
3 changed files with 105 additions and 26 deletions
  1. +75
    -25
      lark/load_grammar.py
  2. +1
    -1
      lark/utils.py
  3. +29
    -0
      tests/test_parser.py

+ 75
- 25
lark/load_grammar.py View File

@@ -179,42 +179,87 @@ RULES = {
class EBNF_to_BNF(Transformer_InPlace):
def __init__(self):
self.new_rules = []
self.rules_by_expr = {}
self.rules_cache = {}
self.prefix = 'anon'
self.i = 0
self.rule_options = None

def _add_recurse_rule(self, type_, expr):
if expr in self.rules_by_expr:
return self.rules_by_expr[expr]

new_name = '__%s_%s_%d' % (self.prefix, type_, self.i)
def _name_rule(self, inner):
new_name = '__%s_%s_%d' % (self.prefix, inner, self.i)
self.i += 1
t = NonTerminal(new_name)
tree = ST('expansions', [ST('expansion', [expr]), ST('expansion', [t, expr])])
self.new_rules.append((new_name, tree, self.rule_options))
self.rules_by_expr[expr] = t
return new_name

def _add_rule(self, key, name, expansions):
t = NonTerminal(name)
self.new_rules.append((name, expansions, self.rule_options))
self.rules_cache[key] = t
return t

def _add_recurse_rule(self, type_, expr):
try:
return self.rules_cache[expr]
except KeyError:
new_name = self._name_rule(type_)
t = NonTerminal(new_name)
tree = ST('expansions', [
ST('expansion', [expr]),
ST('expansion', [t, expr])
])
return self._add_rule(expr, new_name, tree)

def _add_repeat_rule(self, a, b, target, atom):
if (a, b, target, atom) in self.rules_by_expr:
return self.rules_by_expr[(a, b, target, atom)]
new_name = '__%s_a%d_b%d_%d' % (self.prefix, a, b, self.i)
self.i += 1
t = NonTerminal(new_name)
tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)])
self.new_rules.append((new_name, tree, self.rule_options))
self.rules_by_expr[(a, b, target, atom)] = t
return t
"""
When target matches n times atom
This builds a rule that matches atom (a*n + b) times
"""
key = (a, b, target, atom)
try:
return self.rules_cache[key]
except KeyError:
new_name = self._name_rule('a%d_b%d' % (a, b))
tree = ST('expansions', [ST('expansion', [target] * a + [atom] * b)])
return self._add_rule(key, new_name, tree)

def _add_repeat_opt_rule(self, a, b, target, target_opt, atom):
"""
When target matches n times atom, and target_opt 0 to n-1 times target_opt,
This builds a rule that matches atom 0 to (a*n+b)-1 times
"""
key = (a, b, target, atom, "opt")
try:
return self.rules_cache[key]
except KeyError:
new_name = self._name_rule('a%d_b%d_opt' % (a, b))
tree = ST('expansions', [
ST('expansion', [target] * i + [target_opt])
for i in range(a)
] + [
ST('expansion', [target] * a + [atom] * i)
for i in range(1, b)
])
return self._add_rule(key, new_name, tree)

def _generate_repeats(self, rule, mn, mx):
factors = small_factors(mn)
target = rule
for a, b in factors:
target = self._add_repeat_rule(a, b, target, rule)
mn_factors = small_factors(mn)
mn_target = rule
for a, b in mn_factors:
mn_target = self._add_repeat_rule(a, b, mn_target, rule)
if mx == mn:
return mn_target
diff = mx - mn + 1 # We add one because _add_repeat_opt_rule needs it.
diff_factors = small_factors(diff)
diff_target = rule
diff_opt_target = ST('expansion', []) # match rule 0 times (e.g. 1-1 times)
for a, b in diff_factors[:-1]:
new_diff_target = self._add_repeat_rule(a, b, diff_target, rule)
diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)
diff_target = new_diff_target
a, b = diff_factors[-1]
diff_opt_target = self._add_repeat_opt_rule(a, b, diff_target, diff_opt_target, rule)

# return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)])
return ST('expansions', [ST('expansion', [target] + [rule] * n) for n in range(0, mx - mn + 1)])
# return ST('expansions', [ST('expansion', [mn_target] + [rule] * n) for n in range(0, mx - mn + 1)])
return ST('expansions', [ST('expansion', [mn_target] + [diff_opt_target])])

def expr(self, rule, op, *args):
if op.value == '?':
@@ -240,7 +285,12 @@ class EBNF_to_BNF(Transformer_InPlace):
mn, mx = map(int, args)
if mx < mn or mn < 0:
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
return self._generate_repeats(rule, mn, mx)
# For small number of repeats, we don't need to build new rules.
# Value 20 is arbitrarily chosen
if mx > 20:
return self._generate_repeats(rule, mn, mx)
else:
return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx + 1)])
assert False, op

def maybe(self, rule):


+ 1
- 1
lark/utils.py View File

@@ -373,7 +373,7 @@ def small_factors(n):

Currently, we also keep a + b <= 10, but that might change
"""
assert n > 0
assert n >= 0
if n < 10:
return [(n, 0)]
# TODO: Think of better algorithms (Prime factors should minimize the number of steps)


+ 29
- 0
tests/test_parser.py View File

@@ -2226,6 +2226,35 @@ def _make_parser_test(LEXER, PARSER):
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')

@unittest.skipIf(PARSER == 'cyk', "For large number of repeats, empty rules might be generated")
def test_ranged_repeat_large(self):
# Large is currently arbitrarily chosen to be large than 20
g = u"""!start: "A"~30
"""
l = _Lark(g)
self.assertGreater(len(l.rules), 1, "Expected that more than one rule will be generated")
self.assertEqual(l.parse(u'A'*30), Tree('start', ["A"]*30))
self.assertRaises(ParseError, l.parse, u'A'*29)
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'A'*31)


g = u"""!start: "A"~0..100
"""
l = _Lark(g)
self.assertEqual(l.parse(u''), Tree('start', []))
self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
self.assertEqual(l.parse(u'A'*100), Tree('start', ['A']*100))
self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'A' * 101)

# 8191 is a Mersenne prime
g = u"""start: "A"~8191
"""
l = _Lark(g)
self.assertEqual(l.parse(u'A'*8191), Tree('start', []))
self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'A' * 8190)
self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'A' * 8192)


@unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
def test_priority_vs_embedded(self):
g = """


Loading…
Cancel
Save