Syntax: symbol~number | symbol~min..max Example: HEXCOLOR: "#" (HEXDIGIT~3 | HEXDIGIT~6) short_sentence: word~4..20 Added range for tokenstags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.4
@@ -70,6 +70,7 @@ TOKENS = { | |||||
'_COLON': ':', | '_COLON': ':', | ||||
'_OR': r'\|', | '_OR': r'\|', | ||||
'_DOT': r'\.', | '_DOT': r'\.', | ||||
'TILDE': '~', | |||||
'RULE': '!?[_?]?[a-z][_a-z0-9]*', | 'RULE': '!?[_?]?[a-z][_a-z0-9]*', | ||||
'TOKEN': '_?[A-Z][_A-Z0-9]*', | 'TOKEN': '_?[A-Z][_A-Z0-9]*', | ||||
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', | 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', | ||||
@@ -100,7 +101,10 @@ RULES = { | |||||
'_expansion': ['', '_expansion expr'], | '_expansion': ['', '_expansion expr'], | ||||
'?expr': ['atom', | '?expr': ['atom', | ||||
'atom OP'], | |||||
'atom OP', | |||||
'atom TILDE NUMBER', | |||||
'atom TILDE NUMBER _DOT _DOT NUMBER', | |||||
], | |||||
'?atom': ['_LPAR expansions _RPAR', | '?atom': ['_LPAR expansions _RPAR', | ||||
'maybe', | 'maybe', | ||||
@@ -146,7 +150,7 @@ class EBNF_to_BNF(InlineTransformer): | |||||
self.rules_by_expr[expr] = t | self.rules_by_expr[expr] = t | ||||
return t | return t | ||||
def expr(self, rule, op): | |||||
def expr(self, rule, op, *args): | |||||
if op.value == '?': | if op.value == '?': | ||||
return T('expansions', [rule, T('expansion', [])]) | return T('expansions', [rule, T('expansion', [])]) | ||||
elif op.value == '+': | elif op.value == '+': | ||||
@@ -162,6 +166,14 @@ class EBNF_to_BNF(InlineTransformer): | |||||
# _c : _c c | c; | # _c : _c c | c; | ||||
new_name = self._add_recurse_rule('star', rule) | new_name = self._add_recurse_rule('star', rule) | ||||
return T('expansions', [new_name, T('expansion', [])]) | return T('expansions', [new_name, T('expansion', [])]) | ||||
elif op.value == '~': | |||||
if len(args) == 1: | |||||
mn = mx = int(args[0]) | |||||
else: | |||||
mn, mx = map(int, args) | |||||
if mx < mn: | |||||
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx)) | |||||
return T('expansions', [T('expansion', [rule] * n) for n in range(mn, mx+1)]) | |||||
assert False, op | assert False, op | ||||
@@ -377,7 +389,17 @@ class TokenTreeToPattern(Transformer): | |||||
return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)), exps[0].flags) | return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)), exps[0].flags) | ||||
def expr(self, args): | def expr(self, args): | ||||
inner, op = args | |||||
inner, op = args[:2] | |||||
if op == '~': | |||||
if len(args) == 3: | |||||
op = "{%d}" % int(args[2]) | |||||
else: | |||||
mn, mx = map(int, args[2:]) | |||||
if mx < mn: | |||||
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (inner, mn, mx)) | |||||
op = "{%d,%d}" % (mn, mx) | |||||
else: | |||||
assert len(args) == 2 | |||||
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) | return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) | ||||
@@ -1074,6 +1074,60 @@ def _make_parser_test(LEXER, PARSER): | |||||
_Lark(r'start: "\\\t"').parse('\\\t') | _Lark(r'start: "\\\t"').parse('\\\t') | ||||
def test_ranged_repeat_rules(self): | |||||
g = u"""!start: "A"~3 | |||||
""" | |||||
l = _Lark(g) | |||||
self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"])) | |||||
self.assertRaises(ParseError, l.parse, u'AA') | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA') | |||||
g = u"""!start: "A"~0..2 | |||||
""" | |||||
if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars | |||||
l = _Lark(g) | |||||
self.assertEqual(l.parse(u''), Tree('start', [])) | |||||
self.assertEqual(l.parse(u'A'), Tree('start', ['A'])) | |||||
self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A'])) | |||||
self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA') | |||||
g = u"""!start: "A"~3..2 | |||||
""" | |||||
self.assertRaises(GrammarError, _Lark, g) | |||||
g = u"""!start: "A"~2..3 "B"~2 | |||||
""" | |||||
l = _Lark(g) | |||||
self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B'])) | |||||
self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B'])) | |||||
self.assertRaises(ParseError, l.parse, u'AAAB') | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB') | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB') | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB') | |||||
def test_ranged_repeat_terms(self): | |||||
g = u"""!start: AAA | |||||
AAA: "A"~3 | |||||
""" | |||||
l = _Lark(g) | |||||
self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"])) | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA') | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA') | |||||
g = u"""!start: AABB CC | |||||
AABB: "A"~0..2 "B"~2 | |||||
CC: "C"~1..2 | |||||
""" | |||||
l = _Lark(g) | |||||
self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC'])) | |||||
self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C'])) | |||||
self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC'])) | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB') | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB') | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB') | |||||
self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB') | |||||