Browse Source

FEATURE: Added support for ranged-repeat for rules and terminals (Issues #75, #19)

Syntax: symbol~number
      | symbol~min..max

Example:

  HEXCOLOR: "#" (HEXDIGIT~3 | HEXDIGIT~6)
  short_sentence: word~4..20

Added range for tokens
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.4
Erez Shinan 6 years ago
parent
commit
7d11dfa5cd
2 changed files with 79 additions and 3 deletions
  1. +25
    -3
      lark/load_grammar.py
  2. +54
    -0
      tests/test_parser.py

+ 25
- 3
lark/load_grammar.py View File

@@ -70,6 +70,7 @@ TOKENS = {
'_COLON': ':',
'_OR': r'\|',
'_DOT': r'\.',
'TILDE': '~',
'RULE': '!?[_?]?[a-z][_a-z0-9]*',
'TOKEN': '_?[A-Z][_A-Z0-9]*',
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
@@ -100,7 +101,10 @@ RULES = {
'_expansion': ['', '_expansion expr'],

'?expr': ['atom',
'atom OP'],
'atom OP',
'atom TILDE NUMBER',
'atom TILDE NUMBER _DOT _DOT NUMBER',
],

'?atom': ['_LPAR expansions _RPAR',
'maybe',
@@ -146,7 +150,7 @@ class EBNF_to_BNF(InlineTransformer):
self.rules_by_expr[expr] = t
return t

def expr(self, rule, op):
def expr(self, rule, op, *args):
if op.value == '?':
return T('expansions', [rule, T('expansion', [])])
elif op.value == '+':
@@ -162,6 +166,14 @@ class EBNF_to_BNF(InlineTransformer):
# _c : _c c | c;
new_name = self._add_recurse_rule('star', rule)
return T('expansions', [new_name, T('expansion', [])])
elif op.value == '~':
if len(args) == 1:
mn = mx = int(args[0])
else:
mn, mx = map(int, args)
if mx < mn:
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
return T('expansions', [T('expansion', [rule] * n) for n in range(mn, mx+1)])
assert False, op


@@ -377,7 +389,17 @@ class TokenTreeToPattern(Transformer):
return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)), exps[0].flags)

def expr(self, args):
inner, op = args
inner, op = args[:2]
if op == '~':
if len(args) == 3:
op = "{%d}" % int(args[2])
else:
mn, mx = map(int, args[2:])
if mx < mn:
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx))
op = "{%d,%d}" % (mn, mx)
else:
assert len(args) == 2
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)




+ 54
- 0
tests/test_parser.py View File

@@ -1074,6 +1074,60 @@ def _make_parser_test(LEXER, PARSER):
_Lark(r'start: "\\\t"').parse('\\\t')


def test_ranged_repeat_rules(self):
g = u"""!start: "A"~3
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
self.assertRaises(ParseError, l.parse, u'AA')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')


g = u"""!start: "A"~0..2
"""
if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
l = _Lark(g)
self.assertEqual(l.parse(u''), Tree('start', []))
self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')

g = u"""!start: "A"~3..2
"""
self.assertRaises(GrammarError, _Lark, g)

g = u"""!start: "A"~2..3 "B"~2
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
self.assertRaises(ParseError, l.parse, u'AAAB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')


def test_ranged_repeat_terms(self):
g = u"""!start: AAA
AAA: "A"~3
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')

g = u"""!start: AABB CC
AABB: "A"~0..2 "B"~2
CC: "C"~1..2
"""
l = _Lark(g)
self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')





Loading…
Cancel
Save