Browse Source

Change maybe_placeholders: Apply to [a] and [a b c], but not a? or (a b)? or [_a _b _c]

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.6
Erez Shinan 6 years ago
parent
commit
e16bb35576
4 changed files with 37 additions and 18 deletions
  1. +1
    -1
      docs/grammar.md
  2. +2
    -1
      lark/lexer.py
  3. +21
    -10
      lark/load_grammar.py
  4. +13
    -6
      tests/test_parser.py

+ 1
- 1
docs/grammar.md View File

@@ -85,7 +85,7 @@ Each item is one of:
* `TERMINAL`
* `"string literal"` or `/regexp literal/`
* `(item item ..)` - Group items
* `[item item ..]` - Maybe. Same as: `(item item ..)?`
* `[item item ..]` - Maybe. Same as `(item item ..)?`
* `item?` - Zero or one instances of item ("maybe")
* `item*` - Zero or more instances of item
* `item+` - One or more instances of item


+ 2
- 1
lark/lexer.py View File

@@ -171,7 +171,8 @@ class _Lex:

break
else:
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, state=self.state)
allowed = [v for m, tfi in lexer.mres for v in tfi.values()]
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state)


class UnlessCallback:


+ 21
- 10
lark/load_grammar.py View File

@@ -176,13 +176,7 @@ class EBNF_to_BNF(Transformer_InPlace):

def expr(self, rule, op, *args):
if op.value == '?':
if isinstance(rule, Terminal) and rule.filter_out and not (
self.rule_options and self.rule_options.keep_all_tokens):
empty = ST('expansion', [])
elif isinstance(rule, NonTerminal) and rule.name.startswith('_'):
empty = ST('expansion', [])
else:
empty = _EMPTY
empty = ST('expansion', [])
return ST('expansions', [rule, empty])
elif op.value == '+':
# a : b c+ d
@@ -207,6 +201,23 @@ class EBNF_to_BNF(Transformer_InPlace):
return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)])
assert False, op

def maybe(self, rule):
keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens

def will_not_get_removed(sym):
if isinstance(sym, NonTerminal):
return not sym.name.startswith('_')
if isinstance(sym, Terminal):
return keep_all_tokens or not sym.filter_out
assert False

if any(rule.scan_values(will_not_get_removed)):
empty = _EMPTY
else:
empty = ST('expansion', [])

return ST('expansions', [rule, empty])


class SimplifyRule_Visitor(Visitor):

@@ -268,9 +279,6 @@ class RuleTreeToText(Transformer):

@inline_args
class CanonizeTree(Transformer_InPlace):
def maybe(self, expr):
return ST('expr', [expr, Token('OP', '?', -1)])

def tokenmods(self, *args):
if len(args) == 1:
return list(args)
@@ -428,6 +436,9 @@ class TerminalTreeToPattern(Transformer):
assert len(args) == 2
return PatternRE('(?:%s)%s' % (inner.to_regexp(), op), inner.flags)

def maybe(self, expr):
return self.expr(expr + ['?'])

def alias(self, t):
raise GrammarError("Aliasing not allowed in terminals (You used -> in the wrong place)")



+ 13
- 6
tests/test_parser.py View File

@@ -1371,16 +1371,23 @@ def _make_parser_test(LEXER, PARSER):
@unittest.skipIf(PARSER=='cyk', "Empty rules")
def test_maybe_placeholders(self):
# Anonymous tokens shouldn't count
p = _Lark("""start: "a"? "b"? "c"? """, maybe_placeholders=True)
p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [])

# Anonymous tokens shouldn't count, other constructs should
p = _Lark("""start: A? "b"? _c?
# All invisible constructs shouldn't count
p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
A: "a"
_c: "c" """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [None])
self.assertEqual(p.parse("c").children, [None])
self.assertEqual(p.parse("aefc").children, ['a'])

p = _Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True)
# ? shouldn't apply
p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [None, None])
self.assertEqual(p.parse("b").children, [None, 'b', None])

p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [None, None, None])
self.assertEqual(p.parse("a").children, ['a', None, None])
self.assertEqual(p.parse("b").children, [None, 'b', None])
@@ -1390,7 +1397,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])

p = _Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True)
p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
self.assertEqual(p.parse("b").children, [None, 'b', None])
self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
@@ -1401,7 +1408,7 @@ def _make_parser_test(LEXER, PARSER):
'a', 'b', 'c',
None, 'b', None])

p = _Lark("""!start: "a"? "c"? "b"+ "a"? "d"? """, maybe_placeholders=True)
p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])


Loading…
Cancel
Save