Browse Source

New Feature: Added maybe_placeholders option (Issue #285)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.66
Erez Shinan 6 years ago
parent
commit
222df5bab4
5 changed files with 62 additions and 10 deletions
  1. +1
    -0
      lark/grammar.py
  2. +3
    -1
      lark/lark.py
  3. +16
    -6
      lark/load_grammar.py
  4. +20
    -3
      lark/parse_tree_builder.py
  5. +22
    -0
      tests/test_parser.py

+ 1
- 0
lark/grammar.py View File

@@ -64,6 +64,7 @@ class RuleOptions:
self.keep_all_tokens = keep_all_tokens self.keep_all_tokens = keep_all_tokens
self.expand1 = expand1 self.expand1 = expand1
self.priority = priority self.priority = priority
self.empty_indices = ()


def __repr__(self): def __repr__(self):
return 'RuleOptions(%r, %r, %r)' % ( return 'RuleOptions(%r, %r, %r)' % (


+ 3
- 1
lark/lark.py View File

@@ -45,6 +45,7 @@ class LarkOptions(object):
profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches. propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None
""" """
if __doc__: if __doc__:
__doc__ += OPTIONS_DOC __doc__ += OPTIONS_DOC
@@ -66,6 +67,7 @@ class LarkOptions(object):
self.propagate_positions = o.pop('propagate_positions', False) self.propagate_positions = o.pop('propagate_positions', False)
self.earley__predict_all = o.pop('earley__predict_all', False) self.earley__predict_all = o.pop('earley__predict_all', False)
self.lexer_callbacks = o.pop('lexer_callbacks', {}) self.lexer_callbacks = o.pop('lexer_callbacks', {})
self.maybe_placeholders = o.pop('maybe_placeholders', False)


assert self.parser in ('earley', 'lalr', 'cyk', None) assert self.parser in ('earley', 'lalr', 'cyk', None)


@@ -179,7 +181,7 @@ class Lark:
def _build_parser(self): def _build_parser(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer) self.parser_class = get_frontend(self.options.parser, self.options.lexer)


self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr')
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr', self.options.maybe_placeholders)
callback = self._parse_tree_builder.create_callback(self.options.transformer) callback = self._parse_tree_builder.create_callback(self.options.transformer)
if self.profiler: if self.profiler:
for f in dir(callback): for f in dir(callback):


+ 16
- 6
lark/load_grammar.py View File

@@ -3,7 +3,7 @@
import os.path import os.path
import sys import sys
from ast import literal_eval from ast import literal_eval
from copy import deepcopy
from copy import copy, deepcopy


from .utils import bfs from .utils import bfs
from .lexer import Token, TerminalDef, PatternStr, PatternRE from .lexer import Token, TerminalDef, PatternStr, PatternRE
@@ -26,6 +26,8 @@ EXT = '.lark'


_RE_FLAGS = 'imslux' _RE_FLAGS = 'imslux'


_EMPTY = Symbol('__empty__')

_TERMINAL_NAMES = { _TERMINAL_NAMES = {
'.' : 'DOT', '.' : 'DOT',
',' : 'COMMA', ',' : 'COMMA',
@@ -151,7 +153,6 @@ RULES = {
'literal': ['REGEXP', 'STRING'], 'literal': ['REGEXP', 'STRING'],
} }



@inline_args @inline_args
class EBNF_to_BNF(Transformer_InPlace): class EBNF_to_BNF(Transformer_InPlace):
def __init__(self): def __init__(self):
@@ -175,7 +176,7 @@ class EBNF_to_BNF(Transformer_InPlace):


def expr(self, rule, op, *args): def expr(self, rule, op, *args):
if op.value == '?': if op.value == '?':
return ST('expansions', [rule, ST('expansion', [])])
return ST('expansions', [rule, _EMPTY])
elif op.value == '+': elif op.value == '+':
# a : b c+ d # a : b c+ d
# --> # -->
@@ -481,7 +482,8 @@ class Grammar:
for name, rule_tree, options in rule_defs: for name, rule_tree, options in rule_defs:
ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None
tree = transformer.transform(rule_tree) tree = transformer.transform(rule_tree)
rules.append((name, ebnf_to_bnf.transform(tree), options))
res = ebnf_to_bnf.transform(tree)
rules.append((name, res, options))
rules += ebnf_to_bnf.new_rules rules += ebnf_to_bnf.new_rules


assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"
@@ -499,9 +501,17 @@ class Grammar:
if alias and name.startswith('_'): if alias and name.startswith('_'):
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias))


assert all(isinstance(x, Symbol) for x in expansion), expansion
empty_indices = [i for i, x in enumerate(expansion) if x==_EMPTY]
if empty_indices:
assert options
exp_options = copy(options)
exp_options.empty_indices = len(expansion), empty_indices
expansion = [x for x in expansion if x!=_EMPTY]
else:
exp_options = options


rule = Rule(NonTerminal(name), expansion, alias, options)
assert all(isinstance(x, Symbol) for x in expansion), expansion
rule = Rule(NonTerminal(name), expansion, alias, exp_options)
compiled_rules.append(rule) compiled_rules.append(rule)


return terminals, compiled_rules, self.ignore return terminals, compiled_rules, self.ignore


+ 20
- 3
lark/parse_tree_builder.py View File

@@ -1,7 +1,5 @@
from .exceptions import GrammarError from .exceptions import GrammarError
from .utils import suppress
from .lexer import Token from .lexer import Token
from .grammar import Rule
from .tree import Tree from .tree import Tree
from .visitors import InlineTransformer # XXX Deprecated from .visitors import InlineTransformer # XXX Deprecated


@@ -19,6 +17,23 @@ class ExpandSingleChild:
else: else:
return self.node_builder(children) return self.node_builder(children)


class AddMaybePlaceholder:
def __init__(self, empty_indices, node_builder):
self.node_builder = node_builder
self.empty_indices = empty_indices

def __call__(self, children):
t = self.node_builder(children)
if self.empty_indices:
exp_len, empty_indices = self.empty_indices
# Calculate offset to handle repetition correctly
# e.g. ("a" "b"?)+
# For non-repetitive rules, offset should be 0
offset = len(t.children) - (exp_len - len(empty_indices))
for i in empty_indices:
t.children.insert(i + offset, None)
return t



class PropagatePositions: class PropagatePositions:
def __init__(self, node_builder): def __init__(self, node_builder):
@@ -116,11 +131,12 @@ def ptb_inline_args(func):




class ParseTreeBuilder: class ParseTreeBuilder:
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False):
def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False, maybe_placeholders=False):
self.tree_class = tree_class self.tree_class = tree_class
self.propagate_positions = propagate_positions self.propagate_positions = propagate_positions
self.always_keep_all_tokens = keep_all_tokens self.always_keep_all_tokens = keep_all_tokens
self.ambiguous = ambiguous self.ambiguous = ambiguous
self.maybe_placeholders = maybe_placeholders


self.rule_builders = list(self._init_builders(rules)) self.rule_builders = list(self._init_builders(rules))


@@ -135,6 +151,7 @@ class ParseTreeBuilder:
wrapper_chain = filter(None, [ wrapper_chain = filter(None, [
(expand_single_child and not rule.alias) and ExpandSingleChild, (expand_single_child and not rule.alias) and ExpandSingleChild,
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous), maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous),
self.maybe_placeholders and partial(AddMaybePlaceholder, options.empty_indices),
self.propagate_positions and PropagatePositions, self.propagate_positions and PropagatePositions,
]) ])




+ 22
- 0
tests/test_parser.py View File

@@ -1248,6 +1248,28 @@ def _make_parser_test(LEXER, PARSER):
res = p.parse('B') res = p.parse('B')
self.assertEqual(len(res.children), 3) self.assertEqual(len(res.children), 3)


def test_maybe_placeholders(self):
p = Lark("""!start: "a"? "b"? "c"? """, maybe_placeholders=True)
self.assertEqual(p.parse("").children, [None, None, None])
self.assertEqual(p.parse("a").children, ['a', None, None])
self.assertEqual(p.parse("b").children, [None, 'b', None])
self.assertEqual(p.parse("c").children, [None, None, 'c'])
self.assertEqual(p.parse("ab").children, ['a', 'b', None])
self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])

p = Lark("""!start: ("a"? "b" "c"?)+ """, maybe_placeholders=True)
self.assertEqual(p.parse("b").children, [None, 'b', None])
self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
self.assertEqual(p.parse("babbcabcb").children,
[None, 'b', None,
'a', 'b', None,
None, 'b', 'c',
'a', 'b', 'c',
None, 'b', None])





_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize() _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()


Loading…
Cancel
Save