@@ -85,7 +85,7 @@ Each item is one of: | |||||
* `TERMINAL` | * `TERMINAL` | ||||
* `"string literal"` or `/regexp literal/` | * `"string literal"` or `/regexp literal/` | ||||
* `(item item ..)` - Group items | * `(item item ..)` - Group items | ||||
* `[item item ..]` - Maybe. Same as: `(item item ..)?` | |||||
* `[item item ..]` - Maybe. Same as `(item item ..)?` | |||||
* `item?` - Zero or one instances of item ("maybe") | * `item?` - Zero or one instances of item ("maybe") | ||||
* `item*` - Zero or more instances of item | * `item*` - Zero or more instances of item | ||||
* `item+` - One or more instances of item | * `item+` - One or more instances of item | ||||
@@ -171,7 +171,8 @@ class _Lex: | |||||
break | break | ||||
else: | else: | ||||
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state) | |||||
allowed = [v for m, tfi in lexer.mres for v in tfi.values()] | |||||
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state) | |||||
class UnlessCallback: | class UnlessCallback: | ||||
@@ -176,13 +176,7 @@ class EBNF_to_BNF(Transformer_InPlace): | |||||
def expr(self, rule, op, *args): | def expr(self, rule, op, *args): | ||||
if op.value == '?': | if op.value == '?': | ||||
if isinstance(rule, Terminal) and rule.filter_out and not ( | |||||
self.rule_options and self.rule_options.keep_all_tokens): | |||||
empty = ST('expansion', []) | |||||
elif isinstance(rule, NonTerminal) and rule.name.startswith('_'): | |||||
empty = ST('expansion', []) | |||||
else: | |||||
empty = _EMPTY | |||||
empty = ST('expansion', []) | |||||
return ST('expansions', [rule, empty]) | return ST('expansions', [rule, empty]) | ||||
elif op.value == '+': | elif op.value == '+': | ||||
# a : b c+ d | # a : b c+ d | ||||
@@ -207,6 +201,23 @@ class EBNF_to_BNF(Transformer_InPlace): | |||||
return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)]) | return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)]) | ||||
assert False, op | assert False, op | ||||
def maybe(self, rule): | |||||
keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens | |||||
def will_not_get_removed(sym): | |||||
if isinstance(sym, NonTerminal): | |||||
return not sym.name.startswith('_') | |||||
if isinstance(sym, Terminal): | |||||
return keep_all_tokens or not sym.filter_out | |||||
assert False | |||||
if any(rule.scan_values(will_not_get_removed)): | |||||
empty = _EMPTY | |||||
else: | |||||
empty = ST('expansion', []) | |||||
return ST('expansions', [rule, empty]) | |||||
class SimplifyRule_Visitor(Visitor): | class SimplifyRule_Visitor(Visitor): | ||||
@@ -268,9 +279,6 @@ class RuleTreeToText(Transformer): | |||||
@inline_args | @inline_args | ||||
class CanonizeTree(Transformer_InPlace): | class CanonizeTree(Transformer_InPlace): | ||||
def maybe(self, expr): | |||||
return ST('expr', [expr, Token('OP', '?', -1)]) | |||||
def tokenmods(self, *args): | def tokenmods(self, *args): | ||||
if len(args) == 1: | if len(args) == 1: | ||||
return list(args) | return list(args) | ||||
@@ -428,6 +436,9 @@ class TerminalTreeToPattern(Transformer): | |||||
assert len(args) == 2 | assert len(args) == 2 | ||||
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) | return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) | ||||
def maybe(self, expr): | |||||
return self.expr(expr + ['?']) | |||||
def alias(self, t): | def alias(self, t): | ||||
raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)") | raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)") | ||||
@@ -1371,16 +1371,23 @@ def _make_parser_test(LEXER, PARSER): | |||||
@unittest.skipIf(PARSER=='cyk', "Empty rules") | @unittest.skipIf(PARSER=='cyk', "Empty rules") | ||||
def test_maybe_placeholders(self): | def test_maybe_placeholders(self): | ||||
# Anonymous tokens shouldn't count | # Anonymous tokens shouldn't count | ||||
p = _Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||||
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | |||||
self.assertEqual(p.parse("").children, []) | self.assertEqual(p.parse("").children, []) | ||||
# Anonymous tokens shouldn't count, other constructs should | |||||
p = _Lark("""start: A? "b"? _c? | |||||
# All invisible constructs shouldn't count | |||||
p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] | |||||
A: "a" | A: "a" | ||||
_c: "c" """, maybe_placeholders=True) | _c: "c" """, maybe_placeholders=True) | ||||
self.assertEqual(p.parse("").children, [None]) | self.assertEqual(p.parse("").children, [None]) | ||||
self.assertEqual(p.parse("c").children, [None]) | |||||
self.assertEqual(p.parse("aefc").children, ['a']) | |||||
p = _Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||||
# ? shouldn't apply | |||||
p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True) | |||||
self.assertEqual(p.parse("").children, [None, None]) | |||||
self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||||
p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | |||||
self.assertEqual(p.parse("").children, [None, None, None]) | self.assertEqual(p.parse("").children, [None, None, None]) | ||||
self.assertEqual(p.parse("a").children, ['a', None, None]) | self.assertEqual(p.parse("a").children, ['a', None, None]) | ||||
self.assertEqual(p.parse("b").children, [None, 'b', None]) | self.assertEqual(p.parse("b").children, [None, 'b', None]) | ||||
@@ -1390,7 +1397,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) | self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) | ||||
self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) | self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) | ||||
p = _Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True) | |||||
p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True) | |||||
self.assertEqual(p.parse("b").children, [None, 'b', None]) | self.assertEqual(p.parse("b").children, [None, 'b', None]) | ||||
self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) | self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) | ||||
self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) | self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) | ||||
@@ -1401,7 +1408,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
'a', 'b', 'c', | 'a', 'b', 'c', | ||||
None, 'b', None]) | None, 'b', None]) | ||||
p = _Lark("""!start: "a"? "c"? "b"+ "a"? "d"? """, maybe_placeholders=True) | |||||
p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True) | |||||
self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None]) | self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None]) | ||||
self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd']) | self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd']) | ||||
self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None]) | self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None]) | ||||