Fixed reconstructor. All tests passing

6 years ago · cffd610e77
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -75,7 +75,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
        self.column = getattr(token, 'column', '?')
        self.considered_rules = considered_rules
        self.state = state
        self.pos_in_stream = token.pos_in_stream
        self.pos_in_stream = getattr(token, 'pos_in_stream', None)
        message = ("Unexpected token %r at line %s, column %s.\n"
                   "Expected: %s\n"
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -157,9 +157,9 @@ class Lark:
        self.grammar = load_grammar(grammar, self.source)
        # Compile the EBNF grammar into BNF
        tokens, self.rules, self.ignore_tokens = self.grammar.compile()
        self.terminals, self.rules, self.ignore_tokens = self.grammar.compile()
        self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)
        self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)
        if self.options.parser:
            self.parser = self._build_parser()
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -448,8 +448,10 @@ class Grammar:
        self.ignore = ignore
    def compile(self):
        token_defs = list(self.token_defs)
        rule_defs = self.rule_defs
        # We change the trees in-place (to support huge grammars)
        # So deepcopy allows calling compile more than once.
        token_defs = deepcopy(list(self.token_defs))
        rule_defs = deepcopy(self.rule_defs)
        # =================
        #  Compile Tokens
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -67,38 +67,42 @@ class MakeMatchTree:
 class Reconstructor:
    def __init__(self, parser):
        # Recreate the rules to assume a standard lexer
        _tokens, rules, _grammar_extra = parser.grammar.compile()
        # XXX TODO calling compile twice returns different results!
        tokens, rules, _grammar_extra = parser.grammar.compile()
        expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}
        self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
        self.rules = list(self._build_recons_rules(rules))
        d = defaultdict(list)
    def _build_recons_rules(self, rules):
        expand1s = {r.origin for r in rules if r.options and r.options.expand1}
        aliases = defaultdict(list)
        for r in rules:
            # Rules can match their alias
            if r.alias:
                alias = NonTerminal(r.alias)
                d[alias].append(r.expansion)
                d[r.origin].append([alias])
            else:
                d[r.origin].append(r.expansion)
                aliases[r.origin].append( r.alias )
            # Expanded rules can match their own terminal
            for sym in r.expansion:
                if sym in expand1s:
                    d[sym].append([Terminal(sym.name)])
        rule_names = {r.origin for r in rules}
        nonterminals = {sym for sym in rule_names
                       if sym.name.startswith('_') or sym in expand1s or sym in aliases }
        for r in rules:
            recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
                          for sym in r.expansion if not is_discarded_terminal(sym)]
        reduced_rules = defaultdict(list)
        for name, expansions in d.items():
            for expansion in expansions:
                reduced = [sym if sym.name.startswith('_') or sym in expand1s else Terminal(sym.name)
                           for sym in expansion if not is_discarded_terminal(sym)]
            # Skip self-recursive constructs
            if recons_exp == [r.origin]:
                continue
                reduced_rules[name, tuple(reduced)].append(expansion)
            sym = NonTerminal(r.alias) if r.alias else r.origin
        self.rules = [Rule(name, list(reduced), MakeMatchTree(name.name, expansions[0]), None)
                      for (name, reduced), expansions in reduced_rules.items()]
            yield Rule(sym, recons_exp, MakeMatchTree(sym.name, r.expansion))
        self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
        for origin, rule_aliases in aliases.items():
            for alias in rule_aliases:
                yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)]))
            yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin]))
    def _match(self, term, token):
--- a/tests/main.py
+++ b/tests/main.py
@@ -5,6 +5,7 @@ import logging
 from .test_trees import TestTrees
 from .test_tools import TestStandalone
 from .test_reconstructor import TestReconstructor
 try:
    from .test_nearley.test_nearley import TestNearley
--- a/tests/test_reconstructor.py
+++ b/tests/test_reconstructor.py
@@ -10,14 +10,16 @@ common = """
 %ignore WS_INLINE
 """
 def _remove_ws(s):
    return s.replace(' ', '').replace('\n','')
 class TestReconstructor(TestCase):
    def reconstruct(self, grammar, code):
    def assert_reconstruct(self, grammar, code):
        parser = Lark(grammar, parser='lalr')
        tree = parser.parse(code)
        new = Reconstructor(parser).reconstruct(tree)
        self.assertEqual(code.replace(' ', ''), new.replace(' ', ''))
        self.assertEqual(_remove_ws(code), _remove_ws(new))
    def test_starred_rule(self):
@@ -33,7 +35,7 @@ class TestReconstructor(TestCase):
        Elephants: 12
        """
        self.reconstruct(g, code)
        self.assert_reconstruct(g, code)
    def test_starred_group(self):
@@ -47,7 +49,7 @@ class TestReconstructor(TestCase):
        Elephants: 12
        """
        self.reconstruct(g, code)
        self.assert_reconstruct(g, code)
    def test_alias(self):
@@ -65,7 +67,7 @@ class TestReconstructor(TestCase):
        hello
        """
        self.reconstruct(g, code)
        self.assert_reconstruct(g, code)
    def test_json_example(self):
        test_json = '''