@@ -85,7 +85,7 @@ Each item is one of: | |||
* `TERMINAL` | |||
* `"string literal"` or `/regexp literal/` | |||
* `(item item ..)` - Group items | |||
* `[item item ..]` - Maybe. Same as: `(item item ..)?` | |||
* `[item item ..]` - Maybe. Same as `(item item ..)?` | |||
* `item?` - Zero or one instances of item ("maybe") | |||
* `item*` - Zero or more instances of item | |||
* `item+` - One or more instances of item | |||
@@ -171,7 +171,8 @@ class _Lex: | |||
break | |||
else: | |||
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state) | |||
allowed = [v for m, tfi in lexer.mres for v in tfi.values()] | |||
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state) | |||
class UnlessCallback: | |||
@@ -176,13 +176,7 @@ class EBNF_to_BNF(Transformer_InPlace): | |||
def expr(self, rule, op, *args): | |||
if op.value == '?': | |||
if isinstance(rule, Terminal) and rule.filter_out and not ( | |||
self.rule_options and self.rule_options.keep_all_tokens): | |||
empty = ST('expansion', []) | |||
elif isinstance(rule, NonTerminal) and rule.name.startswith('_'): | |||
empty = ST('expansion', []) | |||
else: | |||
empty = _EMPTY | |||
empty = ST('expansion', []) | |||
return ST('expansions', [rule, empty]) | |||
elif op.value == '+': | |||
# a : b c+ d | |||
@@ -207,6 +201,23 @@ class EBNF_to_BNF(Transformer_InPlace): | |||
return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)]) | |||
assert False, op | |||
def maybe(self, rule): | |||
keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens | |||
def will_not_get_removed(sym): | |||
if isinstance(sym, NonTerminal): | |||
return not sym.name.startswith('_') | |||
if isinstance(sym, Terminal): | |||
return keep_all_tokens or not sym.filter_out | |||
assert False | |||
if any(rule.scan_values(will_not_get_removed)): | |||
empty = _EMPTY | |||
else: | |||
empty = ST('expansion', []) | |||
return ST('expansions', [rule, empty]) | |||
class SimplifyRule_Visitor(Visitor): | |||
@@ -268,9 +279,6 @@ class RuleTreeToText(Transformer): | |||
@inline_args | |||
class CanonizeTree(Transformer_InPlace): | |||
def maybe(self, expr): | |||
return ST('expr', [expr, Token('OP', '?', -1)]) | |||
def tokenmods(self, *args): | |||
if len(args) == 1: | |||
return list(args) | |||
@@ -428,6 +436,9 @@ class TerminalTreeToPattern(Transformer): | |||
assert len(args) == 2 | |||
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) | |||
def maybe(self, expr): | |||
return self.expr(expr + ['?']) | |||
def alias(self, t): | |||
raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)") | |||
@@ -1371,16 +1371,23 @@ def _make_parser_test(LEXER, PARSER): | |||
@unittest.skipIf(PARSER=='cyk', "Empty rules") | |||
def test_maybe_placeholders(self): | |||
# Anonymous tokens shouldn't count | |||
p = _Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("").children, []) | |||
# Anonymous tokens shouldn't count, other constructs should | |||
p = _Lark("""start: A? "b"? _c? | |||
# All invisible constructs shouldn't count | |||
p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] | |||
A: "a" | |||
_c: "c" """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("").children, [None]) | |||
self.assertEqual(p.parse("c").children, [None]) | |||
self.assertEqual(p.parse("aefc").children, ['a']) | |||
p = _Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||
# ? shouldn't apply | |||
p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("").children, [None, None]) | |||
self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||
p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("").children, [None, None, None]) | |||
self.assertEqual(p.parse("a").children, ['a', None, None]) | |||
self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||
@@ -1390,7 +1397,7 @@ def _make_parser_test(LEXER, PARSER): | |||
self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) | |||
self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) | |||
p = _Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True) | |||
p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||
self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) | |||
self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) | |||
@@ -1401,7 +1408,7 @@ def _make_parser_test(LEXER, PARSER): | |||
'a', 'b', 'c', | |||
None, 'b', None]) | |||
p = _Lark("""!start: "a"? "c"? "b"+ "a"? "d"? """, maybe_placeholders=True) | |||
p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True) | |||
self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None]) | |||
self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd']) | |||
self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None]) | |||