Remove unused rules (Issue #384)

6 years ago · e79689dce7
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -200,7 +200,7 @@ class Lark(Serialize):
        self.grammar = load_grammar(grammar, self.source)

        # Compile the EBNF grammar into BNF
        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile()
        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start)

        # If the user asked to invert the priorities, negate them all here.
        # This replaces the old 'resolve__antiscore_sum' option.
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -205,7 +205,7 @@ class EBNF_to_BNF(Transformer_InPlace):
        keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens

        def will_not_get_removed(sym):
            if isinstance(sym, NonTerminal): 
            if isinstance(sym, NonTerminal):
                return not sym.name.startswith('_')
            if isinstance(sym, Terminal):
                return keep_all_tokens or not sym.filter_out
@@ -465,7 +465,7 @@ class Grammar:
        self.rule_defs = rule_defs
        self.ignore = ignore

    def compile(self):
    def compile(self, start):
        # We change the trees in-place (to support huge grammars)
        # So deepcopy allows calling compile more than once.
        term_defs = deepcopy(list(self.term_defs))
@@ -546,6 +546,18 @@ class Grammar:
            # Remove duplicates
            compiled_rules = list(set(compiled_rules))


        # Filter out unused rules
        while True:
            c = len(compiled_rules)
            used_rules = {s for r in compiled_rules
                                for s in r.expansion
                                if isinstance(s, NonTerminal)
                                and s != r.origin}
            compiled_rules = [r for r in compiled_rules if r.origin.name==start or r.origin in used_rules]
            if len(compiled_rules) == c:
                break

        # Filter out unused terminals
        used_terms = {t.name for r in compiled_rules
                             for t in r.expansion
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -69,7 +69,7 @@ class MakeMatchTree:
 class Reconstructor:
    def __init__(self, parser):
        # XXX TODO calling compile twice returns different results!
        tokens, rules, _grammar_extra = parser.grammar.compile()
        tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)

        self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
        self.rules = list(self._build_recons_rules(rules))
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1493,6 +1493,19 @@ def _make_parser_test(LEXER, PARSER):

            parser.parse(r'"That" "And a \"b"')


        def test_meddling_unused(self):
            "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"

            grammar = """
                start: EKS* x
                x: EKS
                unused: x*
                EKS: "x"
            """
            parser = _Lark(grammar)


        @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)")
        def test_serialize(self):
            grammar = """