From b90b0a215f78c65f6b56d2b7be004e636ff44252 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sat, 10 Feb 2018 12:20:20 +0200 Subject: [PATCH] Fixed reconstruct (Issue #72) --- lark/parsers/earley.py | 2 +- lark/reconstruct.py | 60 +++++++++++++++++------------------------- 2 files changed, 25 insertions(+), 37 deletions(-) diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 62d3e15..862ad6b 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -153,7 +153,7 @@ class Parser: self.postprocess = {} self.predictions = {} for rule in parser_conf.rules: - self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) + self.postprocess[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias) self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] self.term_matcher = term_matcher diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 590a8e7..be1a13a 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -1,10 +1,10 @@ -import re from collections import defaultdict from .tree import Tree -from .common import is_terminal, ParserConf, PatternStr, Terminal +from .common import is_terminal, ParserConf, PatternStr from .lexer import Token from .parsers import earley +from .grammar import Rule @@ -24,20 +24,6 @@ class Reconstructor: _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever') tokens = {t.name:t for t in _tokens} - token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in _tokens} - - class MatchTerminal(Terminal): - def match(self, other): - if isinstance(other, Tree): - return False - return token_res[self.data].match(other) is not None - - class MatchTree(Terminal): - def match(self, other): - try: - return self.data == other.data - except AttributeError: - return False class WriteTokens: def __init__(self, name, expansion): @@ -45,7 +31,7 @@ class Reconstructor: self.expansion = expansion def f(self, args): - args2 = iter(args) + iter_args = iter(args) to_write = [] for sym in self.expansion: if is_discarded_terminal(sym): @@ -53,7 +39,7 @@ class Reconstructor: assert isinstance(t.pattern, PatternStr) to_write.append(t.pattern.value) else: - x = next(args2) + x = next(iter_args) if isinstance(x, list): to_write += x else: @@ -63,36 +49,38 @@ class Reconstructor: assert x.data == sym, x to_write.append(x) - assert is_iter_empty(args2) - + assert is_iter_empty(iter_args) return to_write + expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1} + d = defaultdict(list) - for name, (expansions, _o) in rules.items(): - for expansion, alias in expansions: - if alias: - d[alias].append(expansion) - d[name].append([alias]) - else: - d[name].append(expansion) - - rules = [] - expand1s = {name for name, (_x, options) in parser.rules.items() - if options and options.expand1} + for r in rules: + if r.alias: + d[r.alias].append(r.expansion) + d[r.origin].append([r.alias]) + else: + d[r.origin].append(r.expansion) + self.rules = [] for name, expansions in d.items(): for expansion in expansions: - reduced = [sym if sym.startswith('_') or sym in expand1s else - MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym) + reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper() for sym in expansion if not is_discarded_terminal(sym)] - rules.append((name, reduced, WriteTokens(name, expansion).f, None)) - self.rules = rules + self.rules.append(Rule(name, reduced, WriteTokens(name, expansion).f, None)) + + def _match(self, term, token): + if isinstance(token, Tree): + return token.data.upper() == term + elif isinstance(token, Token): + return term == token.type + assert False def _reconstruct(self, tree): # TODO: ambiguity? - parser = earley.Parser(self.rules, tree.data, {}) + parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match) res = parser.parse(tree.children) for item in res: if isinstance(item, Tree):