Browse Source

Remove unused rules (Issue #384)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.7.2
Erez Shinan 6 years ago
parent
commit
e79689dce7
4 changed files with 29 additions and 4 deletions
  1. +1
    -1
      lark/lark.py
  2. +14
    -2
      lark/load_grammar.py
  3. +1
    -1
      lark/reconstruct.py
  4. +13
    -0
      tests/test_parser.py

+ 1
- 1
lark/lark.py View File

@@ -200,7 +200,7 @@ class Lark(Serialize):
self.grammar = load_grammar(grammar, self.source)

# Compile the EBNF grammar into BNF
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile()
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)

# If the user asked to invert the priorities, negate them all here.
# This replaces the old 'resolve__antiscore_sum' option.


+ 14
- 2
lark/load_grammar.py View File

@@ -205,7 +205,7 @@ class EBNF_to_BNF(Transformer_InPlace):
keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens

def will_not_get_removed(sym):
if isinstance(sym, NonTerminal):
if isinstance(sym, NonTerminal):
return not sym.name.startswith('_')
if isinstance(sym, Terminal):
return keep_all_tokens or not sym.filter_out
@@ -465,7 +465,7 @@ class Grammar:
self.rule_defs = rule_defs
self.ignore = ignore

def compile(self):
def compile(self, start):
# We change the trees in-place (to support huge grammars)
# So deepcopy allows calling compile more than once.
term_defs = deepcopy(list(self.term_defs))
@@ -546,6 +546,18 @@ class Grammar:
# Remove duplicates
compiled_rules = list(set(compiled_rules))


# Filter out unused rules
while True:
c = len(compiled_rules)
used_rules = {s for r in compiled_rules
for s in r.expansion
if isinstance(s, NonTerminal)
and s != r.origin}
compiled_rules = [r for r in compiled_rules if r.origin.name==start or r.origin in used_rules]
if len(compiled_rules) == c:
break

# Filter out unused terminals
used_terms = {t.name for r in compiled_rules
for t in r.expansion


+ 1
- 1
lark/reconstruct.py View File

@@ -69,7 +69,7 @@ class MakeMatchTree:
class Reconstructor:
def __init__(self, parser):
# XXX TODO calling compile twice returns different results!
tokens, rules, _grammar_extra = parser.grammar.compile()
tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)

self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
self.rules = list(self._build_recons_rules(rules))


+ 13
- 0
tests/test_parser.py View File

@@ -1493,6 +1493,19 @@ def _make_parser_test(LEXER, PARSER):

parser.parse(r'"That" "And a \"b"')


def test_meddling_unused(self):
"Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"

grammar = """
start: EKS* x
x: EKS
unused: x*
EKS: "x"
"""
parser = _Lark(grammar)


@unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)")
def test_serialize(self):
grammar = """


Loading…
Cancel
Save