|
- from collections import defaultdict
-
- from .tree import Tree
- from .visitors import Transformer_InPlace
- from .common import ParserConf, PatternStr
- from .lexer import Token
- from .parsers import earley, resolve_ambig
- from .grammar import Rule, Terminal, NonTerminal
-
-
-
- def is_discarded_terminal(t):
- return t.is_term and t.filter_out
-
- def is_iter_empty(i):
- try:
- _ = next(i)
- return False
- except StopIteration:
- return True
-
- class WriteTokensTransformer(Transformer_InPlace):
- def __init__(self, tokens):
- self.tokens = tokens
-
- def __default__(self, data, children, meta):
- # if not isinstance(t, MatchTree):
- # return t
- if not getattr(meta, 'match_tree', False):
- return Tree(data, children)
-
- iter_args = iter(children)
- to_write = []
- for sym in meta.orig_expansion:
- if is_discarded_terminal(sym):
- t = self.tokens[sym.name]
- assert isinstance(t.pattern, PatternStr)
- to_write.append(t.pattern.value)
- else:
- x = next(iter_args)
- if isinstance(x, list):
- to_write += x
- else:
- if isinstance(x, Token):
- assert Terminal(x.type) == sym, x
- else:
- assert NonTerminal(x.data) == sym, (sym, x)
- to_write.append(x)
-
- assert is_iter_empty(iter_args)
- return to_write
-
-
- class MatchTree(Tree):
- pass
-
- class MakeMatchTree:
- def __init__(self, name, expansion):
- self.name = name
- self.expansion = expansion
-
- def __call__(self, args):
- t = MatchTree(self.name, args)
- t.meta.match_tree = True
- t.meta.orig_expansion = self.expansion
- return t
-
- class Reconstructor:
- def __init__(self, parser):
- # Recreate the rules to assume a standard lexer
- _tokens, rules, _grammar_extra = parser.grammar.compile()
-
- expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}
-
- d = defaultdict(list)
- for r in rules:
- # Rules can match their alias
- if r.alias:
- alias = NonTerminal(r.alias)
- d[alias].append(r.expansion)
- d[r.origin].append([alias])
- else:
- d[r.origin].append(r.expansion)
-
- # Expanded rules can match their own terminal
- for sym in r.expansion:
- if sym in expand1s:
- d[sym].append([Terminal(sym.name)])
-
- reduced_rules = defaultdict(list)
- for name, expansions in d.items():
- for expansion in expansions:
- reduced = [sym if sym.name.startswith('_') or sym in expand1s else Terminal(sym.name)
- for sym in expansion if not is_discarded_terminal(sym)]
-
- reduced_rules[name, tuple(reduced)].append(expansion)
-
- self.rules = [Rule(name, list(reduced), MakeMatchTree(name.name, expansions[0]), None)
- for (name, reduced), expansions in reduced_rules.items()]
-
- self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
-
-
- def _match(self, term, token):
- if isinstance(token, Tree):
- return Terminal(token.data) == term
- elif isinstance(token, Token):
- return term == Terminal(token.type)
- assert False
-
- def _reconstruct(self, tree):
- # TODO: ambiguity?
- parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig)
- unreduced_tree = parser.parse(tree.children) # find a full derivation
- assert unreduced_tree.data == tree.data
- res = self.write_tokens.transform(unreduced_tree)
- for item in res:
- if isinstance(item, Tree):
- for x in self._reconstruct(item):
- yield x
- else:
- yield item
-
- def reconstruct(self, tree):
- return ''.join(self._reconstruct(tree))
|