@@ -75,7 +75,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
self.column = getattr(token, 'column', '?') | self.column = getattr(token, 'column', '?') | ||||
self.considered_rules = considered_rules | self.considered_rules = considered_rules | ||||
self.state = state | self.state = state | ||||
self.pos_in_stream = token.pos_in_stream | |||||
self.pos_in_stream = getattr(token, 'pos_in_stream', None) | |||||
message = ("Unexpected token %r at line %s, column %s.\n" | message = ("Unexpected token %r at line %s, column %s.\n" | ||||
"Expected: %s\n" | "Expected: %s\n" | ||||
@@ -157,9 +157,9 @@ class Lark: | |||||
self.grammar = load_grammar(grammar, self.source) | self.grammar = load_grammar(grammar, self.source) | ||||
# Compile the EBNF grammar into BNF | # Compile the EBNF grammar into BNF | ||||
tokens, self.rules, self.ignore_tokens = self.grammar.compile() | |||||
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile() | |||||
self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks) | |||||
self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks) | |||||
if self.options.parser: | if self.options.parser: | ||||
self.parser = self._build_parser() | self.parser = self._build_parser() | ||||
@@ -448,8 +448,10 @@ class Grammar: | |||||
self.ignore = ignore | self.ignore = ignore | ||||
def compile(self): | def compile(self): | ||||
token_defs = list(self.token_defs) | |||||
rule_defs = self.rule_defs | |||||
# We change the trees in-place (to support huge grammars) | |||||
# So deepcopy allows calling compile more than once. | |||||
token_defs = deepcopy(list(self.token_defs)) | |||||
rule_defs = deepcopy(self.rule_defs) | |||||
# ================= | # ================= | ||||
# Compile Tokens | # Compile Tokens | ||||
@@ -67,38 +67,42 @@ class MakeMatchTree: | |||||
class Reconstructor: | class Reconstructor: | ||||
def __init__(self, parser): | def __init__(self, parser): | ||||
# Recreate the rules to assume a standard lexer | |||||
_tokens, rules, _grammar_extra = parser.grammar.compile() | |||||
# XXX TODO calling compile twice returns different results! | |||||
tokens, rules, _grammar_extra = parser.grammar.compile() | |||||
expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1} | |||||
self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) | |||||
self.rules = list(self._build_recons_rules(rules)) | |||||
d = defaultdict(list) | |||||
def _build_recons_rules(self, rules): | |||||
expand1s = {r.origin for r in rules if r.options and r.options.expand1} | |||||
aliases = defaultdict(list) | |||||
for r in rules: | for r in rules: | ||||
# Rules can match their alias | |||||
if r.alias: | if r.alias: | ||||
alias = NonTerminal(r.alias) | |||||
d[alias].append(r.expansion) | |||||
d[r.origin].append([alias]) | |||||
else: | |||||
d[r.origin].append(r.expansion) | |||||
aliases[r.origin].append( r.alias ) | |||||
# Expanded rules can match their own terminal | |||||
for sym in r.expansion: | |||||
if sym in expand1s: | |||||
d[sym].append([Terminal(sym.name)]) | |||||
rule_names = {r.origin for r in rules} | |||||
nonterminals = {sym for sym in rule_names | |||||
if sym.name.startswith('_') or sym in expand1s or sym in aliases } | |||||
for r in rules: | |||||
recons_exp = [sym if sym in nonterminals else Terminal(sym.name) | |||||
for sym in r.expansion if not is_discarded_terminal(sym)] | |||||
reduced_rules = defaultdict(list) | |||||
for name, expansions in d.items(): | |||||
for expansion in expansions: | |||||
reduced = [sym if sym.name.startswith('_') or sym in expand1s else Terminal(sym.name) | |||||
for sym in expansion if not is_discarded_terminal(sym)] | |||||
# Skip self-recursive constructs | |||||
if recons_exp == [r.origin]: | |||||
continue | |||||
reduced_rules[name, tuple(reduced)].append(expansion) | |||||
sym = NonTerminal(r.alias) if r.alias else r.origin | |||||
self.rules = [Rule(name, list(reduced), MakeMatchTree(name.name, expansions[0]), None) | |||||
for (name, reduced), expansions in reduced_rules.items()] | |||||
yield Rule(sym, recons_exp, MakeMatchTree(sym.name, r.expansion)) | |||||
self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens}) | |||||
for origin, rule_aliases in aliases.items(): | |||||
for alias in rule_aliases: | |||||
yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)])) | |||||
yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin])) | |||||
def _match(self, term, token): | def _match(self, term, token): | ||||
@@ -5,6 +5,7 @@ import logging | |||||
from .test_trees import TestTrees | from .test_trees import TestTrees | ||||
from .test_tools import TestStandalone | from .test_tools import TestStandalone | ||||
from .test_reconstructor import TestReconstructor | |||||
try: | try: | ||||
from .test_nearley.test_nearley import TestNearley | from .test_nearley.test_nearley import TestNearley | ||||
@@ -10,14 +10,16 @@ common = """ | |||||
%ignore WS_INLINE | %ignore WS_INLINE | ||||
""" | """ | ||||
def _remove_ws(s): | |||||
return s.replace(' ', '').replace('\n','') | |||||
class TestReconstructor(TestCase): | class TestReconstructor(TestCase): | ||||
def reconstruct(self, grammar, code): | |||||
def assert_reconstruct(self, grammar, code): | |||||
parser = Lark(grammar, parser='lalr') | parser = Lark(grammar, parser='lalr') | ||||
tree = parser.parse(code) | tree = parser.parse(code) | ||||
new = Reconstructor(parser).reconstruct(tree) | new = Reconstructor(parser).reconstruct(tree) | ||||
self.assertEqual(code.replace(' ', ''), new.replace(' ', '')) | |||||
self.assertEqual(_remove_ws(code), _remove_ws(new)) | |||||
def test_starred_rule(self): | def test_starred_rule(self): | ||||
@@ -33,7 +35,7 @@ class TestReconstructor(TestCase): | |||||
Elephants: 12 | Elephants: 12 | ||||
""" | """ | ||||
self.reconstruct(g, code) | |||||
self.assert_reconstruct(g, code) | |||||
def test_starred_group(self): | def test_starred_group(self): | ||||
@@ -47,7 +49,7 @@ class TestReconstructor(TestCase): | |||||
Elephants: 12 | Elephants: 12 | ||||
""" | """ | ||||
self.reconstruct(g, code) | |||||
self.assert_reconstruct(g, code) | |||||
def test_alias(self): | def test_alias(self): | ||||
@@ -65,7 +67,7 @@ class TestReconstructor(TestCase): | |||||
hello | hello | ||||
""" | """ | ||||
self.reconstruct(g, code) | |||||
self.assert_reconstruct(g, code) | |||||
def test_json_example(self): | def test_json_example(self): | ||||
test_json = ''' | test_json = ''' | ||||