| @@ -85,7 +85,7 @@ Each item is one of: | |||
| * `TERMINAL` | |||
| * `"string literal"` or `/regexp literal/` | |||
| * `(item item ..)` - Group items | |||
| * `[item item ..]` - Maybe. Same as: `(item item ..)?` | |||
| * `[item item ..]` - Maybe. Same as `(item item ..)?` | |||
| * `item?` - Zero or one instances of item ("maybe") | |||
| * `item*` - Zero or more instances of item | |||
| * `item+` - One or more instances of item | |||
| @@ -171,7 +171,8 @@ class _Lex: | |||
| break | |||
| else: | |||
| raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state) | |||
| allowed = [v for m, tfi in lexer.mres for v in tfi.values()] | |||
| raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state) | |||
| class UnlessCallback: | |||
| @@ -176,13 +176,7 @@ class EBNF_to_BNF(Transformer_InPlace): | |||
| def expr(self, rule, op, *args): | |||
| if op.value == '?': | |||
| if isinstance(rule, Terminal) and rule.filter_out and not ( | |||
| self.rule_options and self.rule_options.keep_all_tokens): | |||
| empty = ST('expansion', []) | |||
| elif isinstance(rule, NonTerminal) and rule.name.startswith('_'): | |||
| empty = ST('expansion', []) | |||
| else: | |||
| empty = _EMPTY | |||
| empty = ST('expansion', []) | |||
| return ST('expansions', [rule, empty]) | |||
| elif op.value == '+': | |||
| # a : b c+ d | |||
| @@ -207,6 +201,23 @@ class EBNF_to_BNF(Transformer_InPlace): | |||
| return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)]) | |||
| assert False, op | |||
| def maybe(self, rule): | |||
| keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens | |||
| def will_not_get_removed(sym): | |||
| if isinstance(sym, NonTerminal): | |||
| return not sym.name.startswith('_') | |||
| if isinstance(sym, Terminal): | |||
| return keep_all_tokens or not sym.filter_out | |||
| assert False | |||
| if any(rule.scan_values(will_not_get_removed)): | |||
| empty = _EMPTY | |||
| else: | |||
| empty = ST('expansion', []) | |||
| return ST('expansions', [rule, empty]) | |||
| class SimplifyRule_Visitor(Visitor): | |||
| @@ -268,9 +279,6 @@ class RuleTreeToText(Transformer): | |||
| @inline_args | |||
| class CanonizeTree(Transformer_InPlace): | |||
| def maybe(self, expr): | |||
| return ST('expr', [expr, Token('OP', '?', -1)]) | |||
| def tokenmods(self, *args): | |||
| if len(args) == 1: | |||
| return list(args) | |||
| @@ -428,6 +436,9 @@ class TerminalTreeToPattern(Transformer): | |||
| assert len(args) == 2 | |||
| return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags) | |||
| def maybe(self, expr): | |||
| return self.expr(expr + ['?']) | |||
| def alias(self, t): | |||
| raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)") | |||
| @@ -1371,16 +1371,23 @@ def _make_parser_test(LEXER, PARSER): | |||
| @unittest.skipIf(PARSER=='cyk', "Empty rules") | |||
| def test_maybe_placeholders(self): | |||
| # Anonymous tokens shouldn't count | |||
| p = _Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||
| p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | |||
| self.assertEqual(p.parse("").children, []) | |||
| # Anonymous tokens shouldn't count, other constructs should | |||
| p = _Lark("""start: A? "b"? _c? | |||
| # All invisible constructs shouldn't count | |||
| p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c] | |||
| A: "a" | |||
| _c: "c" """, maybe_placeholders=True) | |||
| self.assertEqual(p.parse("").children, [None]) | |||
| self.assertEqual(p.parse("c").children, [None]) | |||
| self.assertEqual(p.parse("aefc").children, ['a']) | |||
| p = _Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True) | |||
| # ? shouldn't apply | |||
| p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True) | |||
| self.assertEqual(p.parse("").children, [None, None]) | |||
| self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||
| p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True) | |||
| self.assertEqual(p.parse("").children, [None, None, None]) | |||
| self.assertEqual(p.parse("a").children, ['a', None, None]) | |||
| self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||
| @@ -1390,7 +1397,7 @@ def _make_parser_test(LEXER, PARSER): | |||
| self.assertEqual(p.parse("bc").children, [None, 'b', 'c']) | |||
| self.assertEqual(p.parse("abc").children, ['a', 'b', 'c']) | |||
| p = _Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True) | |||
| p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True) | |||
| self.assertEqual(p.parse("b").children, [None, 'b', None]) | |||
| self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None]) | |||
| self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c']) | |||
| @@ -1401,7 +1408,7 @@ def _make_parser_test(LEXER, PARSER): | |||
| 'a', 'b', 'c', | |||
| None, 'b', None]) | |||
| p = _Lark("""!start: "a"? "c"? "b"+ "a"? "d"? """, maybe_placeholders=True) | |||
| p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True) | |||
| self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None]) | |||
| self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd']) | |||
| self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None]) | |||