| @@ -19,9 +19,13 @@ def is_iter_empty(i): | |||
| except StopIteration: | |||
| return True | |||
| class WriteTokensTransformer(Transformer_InPlace): | |||
| def __init__(self, tokens): | |||
| "Inserts discarded tokens into their correct place, according to the rules of grammar" | |||
| def __init__(self, tokens, term_subs): | |||
| self.tokens = tokens | |||
| self.term_subs = term_subs | |||
| def __default__(self, data, children, meta): | |||
| # if not isinstance(t, MatchTree): | |||
| @@ -33,10 +37,15 @@ class WriteTokensTransformer(Transformer_InPlace): | |||
| to_write = [] | |||
| for sym in meta.orig_expansion: | |||
| if is_discarded_terminal(sym): | |||
| t = self.tokens[sym.name] | |||
| if not isinstance(t.pattern, PatternStr): | |||
| raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) | |||
| to_write.append(t.pattern.value) | |||
| try: | |||
| v = self.term_subs[sym.name](sym) | |||
| except KeyError: | |||
| t = self.tokens[sym.name] | |||
| if not isinstance(t.pattern, PatternStr): | |||
| raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) | |||
| v = t.pattern.value | |||
| to_write.append(v) | |||
| else: | |||
| x = next(iter_args) | |||
| if isinstance(x, list): | |||
| @@ -66,14 +75,34 @@ class MakeMatchTree: | |||
| t.meta.orig_expansion = self.expansion | |||
| return t | |||
| def best_from_group(seq, group_key, cmp_key): | |||
| d = {} | |||
| for item in seq: | |||
| key = group_key(item) | |||
| if key in d: | |||
| v1 = cmp_key(item) | |||
| v2 = cmp_key(d[key]) | |||
| if v2 > v1: | |||
| d[key] = item | |||
| else: | |||
| d[key] = item | |||
| return list(d.values()) | |||
| class Reconstructor: | |||
| def __init__(self, parser): | |||
| def __init__(self, parser, term_subs={}): | |||
| # XXX TODO calling compile twice returns different results! | |||
| assert parser.options.maybe_placeholders == False | |||
| tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) | |||
| self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) | |||
| self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs) | |||
| self.rules = list(self._build_recons_rules(rules)) | |||
| self.rules.reverse() | |||
| # print(len(self.rules)) | |||
| self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion)) | |||
| # print(len(self.rules)) | |||
| # self.rules = list(set(list(self._build_recons_rules(rules)))) | |||
| self.rules.sort(key=lambda r: len(r.expansion)) | |||
| callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias? | |||
| self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start), | |||
| self._match, resolve_ambiguity=True) | |||
| @@ -127,4 +156,12 @@ class Reconstructor: | |||
| yield item | |||
| def reconstruct(self, tree): | |||
| return ''.join(self._reconstruct(tree)) | |||
| x = self._reconstruct(tree) | |||
| y = [] | |||
| prev_item = '' | |||
| for item in x: | |||
| if prev_item and item and prev_item[-1].isalnum() and item[0].isalnum(): | |||
| y.append(' ') | |||
| y.append(item) | |||
| prev_item = item | |||
| return ''.join(y) | |||