@@ -200,7 +200,7 @@ class Lark(Serialize): | |||||
self.grammar = load_grammar(grammar, self.source) | self.grammar = load_grammar(grammar, self.source) | ||||
# Compile the EBNF grammar into BNF | # Compile the EBNF grammar into BNF | ||||
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile() | |||||
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | |||||
# If the user asked to invert the priorities, negate them all here. | # If the user asked to invert the priorities, negate them all here. | ||||
# This replaces the old 'resolve__antiscore_sum' option. | # This replaces the old 'resolve__antiscore_sum' option. | ||||
@@ -205,7 +205,7 @@ class EBNF_to_BNF(Transformer_InPlace): | |||||
keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens | keep_all_tokens = self.rule_options and self.rule_options.keep_all_tokens | ||||
def will_not_get_removed(sym): | def will_not_get_removed(sym): | ||||
if isinstance(sym, NonTerminal): | |||||
if isinstance(sym, NonTerminal): | |||||
return not sym.name.startswith('_') | return not sym.name.startswith('_') | ||||
if isinstance(sym, Terminal): | if isinstance(sym, Terminal): | ||||
return keep_all_tokens or not sym.filter_out | return keep_all_tokens or not sym.filter_out | ||||
@@ -465,7 +465,7 @@ class Grammar: | |||||
self.rule_defs = rule_defs | self.rule_defs = rule_defs | ||||
self.ignore = ignore | self.ignore = ignore | ||||
def compile(self): | |||||
def compile(self, start): | |||||
# We change the trees in-place (to support huge grammars) | # We change the trees in-place (to support huge grammars) | ||||
# So deepcopy allows calling compile more than once. | # So deepcopy allows calling compile more than once. | ||||
term_defs = deepcopy(list(self.term_defs)) | term_defs = deepcopy(list(self.term_defs)) | ||||
@@ -546,6 +546,18 @@ class Grammar: | |||||
# Remove duplicates | # Remove duplicates | ||||
compiled_rules = list(set(compiled_rules)) | compiled_rules = list(set(compiled_rules)) | ||||
# Filter out unused rules | |||||
while True: | |||||
c = len(compiled_rules) | |||||
used_rules = {s for r in compiled_rules | |||||
for s in r.expansion | |||||
if isinstance(s, NonTerminal) | |||||
and s != r.origin} | |||||
compiled_rules = [r for r in compiled_rules if r.origin.name==start or r.origin in used_rules] | |||||
if len(compiled_rules) == c: | |||||
break | |||||
# Filter out unused terminals | # Filter out unused terminals | ||||
used_terms = {t.name for r in compiled_rules | used_terms = {t.name for r in compiled_rules | ||||
for t in r.expansion | for t in r.expansion | ||||
@@ -69,7 +69,7 @@ class MakeMatchTree: | |||||
class Reconstructor: | class Reconstructor: | ||||
def __init__(self, parser): | def __init__(self, parser): | ||||
# XXX TODO calling compile twice returns different results! | # XXX TODO calling compile twice returns different results! | ||||
tokens, rules, _grammar_extra = parser.grammar.compile() | |||||
tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) | |||||
self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) | self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) | ||||
self.rules = list(self._build_recons_rules(rules)) | self.rules = list(self._build_recons_rules(rules)) | ||||
@@ -1493,6 +1493,19 @@ def _make_parser_test(LEXER, PARSER): | |||||
parser.parse(r'"That" "And a \"b"') | parser.parse(r'"That" "And a \"b"') | ||||
def test_meddling_unused(self): | |||||
"Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision" | |||||
grammar = """ | |||||
start: EKS* x | |||||
x: EKS | |||||
unused: x* | |||||
EKS: "x" | |||||
""" | |||||
parser = _Lark(grammar) | |||||
@unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)") | @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)") | ||||
def test_serialize(self): | def test_serialize(self): | ||||
grammar = """ | grammar = """ | ||||