| @@ -19,9 +19,13 @@ def is_iter_empty(i): | |||||
| except StopIteration: | except StopIteration: | ||||
| return True | return True | ||||
| class WriteTokensTransformer(Transformer_InPlace): | class WriteTokensTransformer(Transformer_InPlace): | ||||
| def __init__(self, tokens): | |||||
| "Inserts discarded tokens into their correct place, according to the rules of grammar" | |||||
| def __init__(self, tokens, term_subs): | |||||
| self.tokens = tokens | self.tokens = tokens | ||||
| self.term_subs = term_subs | |||||
| def __default__(self, data, children, meta): | def __default__(self, data, children, meta): | ||||
| # if not isinstance(t, MatchTree): | # if not isinstance(t, MatchTree): | ||||
| @@ -33,10 +37,15 @@ class WriteTokensTransformer(Transformer_InPlace): | |||||
| to_write = [] | to_write = [] | ||||
| for sym in meta.orig_expansion: | for sym in meta.orig_expansion: | ||||
| if is_discarded_terminal(sym): | if is_discarded_terminal(sym): | ||||
| t = self.tokens[sym.name] | |||||
| if not isinstance(t.pattern, PatternStr): | |||||
| raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) | |||||
| to_write.append(t.pattern.value) | |||||
| try: | |||||
| v = self.term_subs[sym.name](sym) | |||||
| except KeyError: | |||||
| t = self.tokens[sym.name] | |||||
| if not isinstance(t.pattern, PatternStr): | |||||
| raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) | |||||
| v = t.pattern.value | |||||
| to_write.append(v) | |||||
| else: | else: | ||||
| x = next(iter_args) | x = next(iter_args) | ||||
| if isinstance(x, list): | if isinstance(x, list): | ||||
| @@ -66,14 +75,34 @@ class MakeMatchTree: | |||||
| t.meta.orig_expansion = self.expansion | t.meta.orig_expansion = self.expansion | ||||
| return t | return t | ||||
| def best_from_group(seq, group_key, cmp_key): | |||||
| d = {} | |||||
| for item in seq: | |||||
| key = group_key(item) | |||||
| if key in d: | |||||
| v1 = cmp_key(item) | |||||
| v2 = cmp_key(d[key]) | |||||
| if v2 > v1: | |||||
| d[key] = item | |||||
| else: | |||||
| d[key] = item | |||||
| return list(d.values()) | |||||
| class Reconstructor: | class Reconstructor: | ||||
| def __init__(self, parser): | |||||
| def __init__(self, parser, term_subs={}): | |||||
| # XXX TODO calling compile twice returns different results! | # XXX TODO calling compile twice returns different results! | ||||
| assert parser.options.maybe_placeholders == False | assert parser.options.maybe_placeholders == False | ||||
| tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) | tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) | ||||
| self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}) | |||||
| self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs) | |||||
| self.rules = list(self._build_recons_rules(rules)) | self.rules = list(self._build_recons_rules(rules)) | ||||
| self.rules.reverse() | |||||
| # print(len(self.rules)) | |||||
| self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion)) | |||||
| # print(len(self.rules)) | |||||
| # self.rules = list(set(list(self._build_recons_rules(rules)))) | |||||
| self.rules.sort(key=lambda r: len(r.expansion)) | |||||
| callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias? | callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias? | ||||
| self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start), | self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start), | ||||
| self._match, resolve_ambiguity=True) | self._match, resolve_ambiguity=True) | ||||
| @@ -127,4 +156,12 @@ class Reconstructor: | |||||
| yield item | yield item | ||||
| def reconstruct(self, tree): | def reconstruct(self, tree): | ||||
| return ''.join(self._reconstruct(tree)) | |||||
| x = self._reconstruct(tree) | |||||
| y = [] | |||||
| prev_item = '' | |||||
| for item in x: | |||||
| if prev_item and item and prev_item[-1].isalnum() and item[0].isalnum(): | |||||
| y.append(' ') | |||||
| y.append(item) | |||||
| prev_item = item | |||||
| return ''.join(y) | |||||