|
|
@@ -1,9 +1,9 @@ |
|
|
|
from collections import defaultdict |
|
|
|
|
|
|
|
from .tree import Tree |
|
|
|
from .tree import Tree, Transformer_NoRecurse |
|
|
|
from .common import is_terminal, ParserConf, PatternStr |
|
|
|
from .lexer import Token |
|
|
|
from .parsers import earley |
|
|
|
from .parsers import earley, resolve_ambig |
|
|
|
from .grammar import Rule |
|
|
|
|
|
|
|
|
|
|
@@ -18,57 +18,82 @@ def is_iter_empty(i): |
|
|
|
except StopIteration: |
|
|
|
return True |
|
|
|
|
|
|
|
class WriteTokensTransformer(Transformer_NoRecurse): |
|
|
|
def __init__(self, tokens): |
|
|
|
self.tokens = tokens |
|
|
|
|
|
|
|
def __default__(self, t): |
|
|
|
if not isinstance(t, MatchTree): |
|
|
|
return t |
|
|
|
|
|
|
|
iter_args = iter(t.children) |
|
|
|
to_write = [] |
|
|
|
for sym in t.orig_expansion: |
|
|
|
if is_discarded_terminal(sym): |
|
|
|
t = self.tokens[sym] |
|
|
|
assert isinstance(t.pattern, PatternStr) |
|
|
|
to_write.append(t.pattern.value) |
|
|
|
else: |
|
|
|
x = next(iter_args) |
|
|
|
if isinstance(x, list): |
|
|
|
to_write += x |
|
|
|
else: |
|
|
|
if isinstance(x, Token): |
|
|
|
assert x.type == sym, x |
|
|
|
else: |
|
|
|
assert x.data == sym, (sym, x) |
|
|
|
to_write.append(x) |
|
|
|
|
|
|
|
assert is_iter_empty(iter_args) |
|
|
|
return to_write |
|
|
|
|
|
|
|
|
|
|
|
class MatchTree(Tree): |
|
|
|
pass |
|
|
|
|
|
|
|
class MakeMatchTree: |
|
|
|
def __init__(self, name, expansion): |
|
|
|
self.name = name |
|
|
|
self.expansion = expansion |
|
|
|
|
|
|
|
def __call__(self, args): |
|
|
|
t = MatchTree(self.name, args) |
|
|
|
t.orig_expansion = self.expansion |
|
|
|
return t |
|
|
|
|
|
|
|
class Reconstructor: |
|
|
|
def __init__(self, parser): |
|
|
|
# Recreate the rules to assume a standard lexer |
|
|
|
_tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever') |
|
|
|
tokens = {t.name:t for t in _tokens} |
|
|
|
|
|
|
|
|
|
|
|
class WriteTokens: |
|
|
|
def __init__(self, name, expansion): |
|
|
|
self.name = name |
|
|
|
self.expansion = expansion |
|
|
|
|
|
|
|
def f(self, args): |
|
|
|
iter_args = iter(args) |
|
|
|
to_write = [] |
|
|
|
for sym in self.expansion: |
|
|
|
if is_discarded_terminal(sym): |
|
|
|
t = tokens[sym] |
|
|
|
assert isinstance(t.pattern, PatternStr) |
|
|
|
to_write.append(t.pattern.value) |
|
|
|
else: |
|
|
|
x = next(iter_args) |
|
|
|
if isinstance(x, list): |
|
|
|
to_write += x |
|
|
|
else: |
|
|
|
if isinstance(x, Token): |
|
|
|
assert x.type == sym, x |
|
|
|
else: |
|
|
|
assert x.data == sym, x |
|
|
|
to_write.append(x) |
|
|
|
|
|
|
|
assert is_iter_empty(iter_args) |
|
|
|
return to_write |
|
|
|
|
|
|
|
expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1} |
|
|
|
|
|
|
|
d = defaultdict(list) |
|
|
|
for r in rules: |
|
|
|
# Rules can match their alias |
|
|
|
if r.alias: |
|
|
|
d[r.alias].append(r.expansion) |
|
|
|
d[r.origin].append([r.alias]) |
|
|
|
else: |
|
|
|
d[r.origin].append(r.expansion) |
|
|
|
|
|
|
|
self.rules = [] |
|
|
|
# Expanded rules can match their own terminal |
|
|
|
for sym in r.expansion: |
|
|
|
if sym in expand1s: |
|
|
|
d[sym].append([sym.upper()]) |
|
|
|
|
|
|
|
reduced_rules = defaultdict(list) |
|
|
|
for name, expansions in d.items(): |
|
|
|
for expansion in expansions: |
|
|
|
reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper() |
|
|
|
for sym in expansion if not is_discarded_terminal(sym)] |
|
|
|
|
|
|
|
self.rules.append(Rule(name, reduced, WriteTokens(name, expansion).f, None)) |
|
|
|
reduced_rules[name, tuple(reduced)].append(expansion) |
|
|
|
|
|
|
|
self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None) |
|
|
|
for (name, reduced), expansions in reduced_rules.items()] |
|
|
|
|
|
|
|
self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens}) |
|
|
|
|
|
|
|
|
|
|
|
def _match(self, term, token): |
|
|
@@ -80,8 +105,10 @@ class Reconstructor: |
|
|
|
|
|
|
|
def _reconstruct(self, tree): |
|
|
|
# TODO: ambiguity? |
|
|
|
parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match) |
|
|
|
res = parser.parse(tree.children) |
|
|
|
parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig) |
|
|
|
unreduced_tree = parser.parse(tree.children) # find a full derivation |
|
|
|
assert unreduced_tree.data == tree.data |
|
|
|
res = self.write_tokens.transform(unreduced_tree) |
|
|
|
for item in res: |
|
|
|
if isinstance(item, Tree): |
|
|
|
for x in self._reconstruct(item): |
|
|
|