From 86f1bb1db69196f13a288bab54c1ae5966b49c80 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Wed, 20 Nov 2019 22:31:20 +0200 Subject: [PATCH] Improved the reconstructor, but it still feels like a lost cause --- lark/reconstruct.py | 53 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/lark/reconstruct.py b/lark/reconstruct.py index fb47b93..b7a6659 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -19,9 +19,13 @@ def is_iter_empty(i): except StopIteration: return True + class WriteTokensTransformer(Transformer_InPlace): - def __init__(self, tokens): + "Inserts discarded tokens into their correct place, according to the rules of grammar" + + def __init__(self, tokens, term_subs): self.tokens = tokens + self.term_subs = term_subs def __default__(self, data, children, meta): # if not isinstance(t, MatchTree): @@ -33,10 +37,15 @@ class WriteTokensTransformer(Transformer_InPlace): to_write = [] for sym in meta.orig_expansion: if is_discarded_terminal(sym): - t = self.tokens[sym.name] - if not isinstance(t.pattern, PatternStr): - raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) - to_write.append(t.pattern.value) + try: + v = self.term_subs[sym.name](sym) + except KeyError: + t = self.tokens[sym.name] + if not isinstance(t.pattern, PatternStr): + raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) + + v = t.pattern.value + to_write.append(v) else: x = next(iter_args) if isinstance(x, list): @@ -66,14 +75,34 @@ class MakeMatchTree: t.meta.orig_expansion = self.expansion return t +def best_from_group(seq, group_key, cmp_key): + d = {} + for item in seq: + key = group_key(item) + if key in d: + v1 = cmp_key(item) + v2 = cmp_key(d[key]) + if v2 > v1: + d[key] = item + else: + d[key] = item + return list(d.values()) + class Reconstructor: - def __init__(self, parser): + def __init__(self, parser, term_subs={}): # XXX TODO calling compile twice returns different results! assert parser.options.maybe_placeholders == False tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) - self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) + self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs) self.rules = list(self._build_recons_rules(rules)) + self.rules.reverse() + # print(len(self.rules)) + self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion)) + # print(len(self.rules)) + + # self.rules = list(set(list(self._build_recons_rules(rules)))) + self.rules.sort(key=lambda r: len(r.expansion)) callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias? self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start), self._match, resolve_ambiguity=True) @@ -127,4 +156,12 @@ class Reconstructor: yield item def reconstruct(self, tree): - return ''.join(self._reconstruct(tree)) + x = self._reconstruct(tree) + y = [] + prev_item = '' + for item in x: + if prev_item and item and prev_item[-1].isalnum() and item[0].isalnum(): + y.append(' ') + y.append(item) + prev_item = item + return ''.join(y)