|
|
@@ -0,0 +1,109 @@ |
|
|
|
import re |
|
|
|
from collections import defaultdict |
|
|
|
|
|
|
|
from .tree import Tree |
|
|
|
from .common import is_terminal |
|
|
|
from .lexer import Token, TokenDef__Str |
|
|
|
from .parsers import earley |
|
|
|
from .lark import Lark |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_discarded_terminal(t): |
|
|
|
return is_terminal(t) and t.startswith('_') |
|
|
|
|
|
|
|
def is_iter_empty(i): |
|
|
|
try: |
|
|
|
_ = next(i) |
|
|
|
return False |
|
|
|
except StopIteration: |
|
|
|
return True |
|
|
|
|
|
|
|
class Reconstructor: |
|
|
|
def __init__(self, parser): |
|
|
|
tokens = {t.name:t for t in parser.lexer_conf.tokens} |
|
|
|
token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens} |
|
|
|
|
|
|
|
class MatchData: |
|
|
|
def __init__(self, data): |
|
|
|
self.data = data |
|
|
|
|
|
|
|
class MatchTerminal(MatchData): |
|
|
|
def match(self, other): |
|
|
|
return token_res[self.data].match(other) is not None |
|
|
|
|
|
|
|
class MatchTree(MatchData): |
|
|
|
def match(self, other): |
|
|
|
return self.data == other.data |
|
|
|
|
|
|
|
class WriteTokens: |
|
|
|
def __init__(self, name, expansion): |
|
|
|
self.name = name |
|
|
|
self.expansion = expansion |
|
|
|
|
|
|
|
def f(self, args): |
|
|
|
args2 = iter(args) |
|
|
|
to_write = [] |
|
|
|
for sym in self.expansion: |
|
|
|
if is_discarded_terminal(sym): |
|
|
|
t = tokens[sym] |
|
|
|
assert isinstance(t, TokenDef__Str) |
|
|
|
to_write.append(t.value) |
|
|
|
else: |
|
|
|
x = next(args2) |
|
|
|
if isinstance(x, list): |
|
|
|
to_write += x |
|
|
|
else: |
|
|
|
if isinstance(x, Token): |
|
|
|
assert x.type == sym, x |
|
|
|
else: |
|
|
|
assert x.data == sym, x |
|
|
|
to_write.append(x) |
|
|
|
|
|
|
|
assert is_iter_empty(args2) |
|
|
|
|
|
|
|
return to_write |
|
|
|
|
|
|
|
d = defaultdict(list) |
|
|
|
for name, expansions in parser.rules.items(): |
|
|
|
for expansion, alias in expansions: |
|
|
|
if alias: |
|
|
|
d[alias].append(expansion) |
|
|
|
d[name].append([alias]) |
|
|
|
else: |
|
|
|
d[name].append(expansion) |
|
|
|
|
|
|
|
rules = [] |
|
|
|
expand1s = {name.lstrip('!').lstrip('?') for name in d |
|
|
|
if name.startswith(('?', '!?'))} # XXX Ugly code |
|
|
|
|
|
|
|
for name, expansions in d.items(): |
|
|
|
for expansion in expansions: |
|
|
|
reduced = [sym if sym.startswith('_') or sym in expand1s else |
|
|
|
(sym, MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym)) |
|
|
|
for sym in expansion if not is_discarded_terminal(sym)] |
|
|
|
|
|
|
|
name = name.lstrip('!').lstrip('?') |
|
|
|
|
|
|
|
rules.append({'name': name, |
|
|
|
'symbols': reduced, |
|
|
|
'postprocess': WriteTokens(name, expansion).f |
|
|
|
}) |
|
|
|
self.rules = rules |
|
|
|
|
|
|
|
|
|
|
|
def _reconstruct(self, tree): |
|
|
|
parser = earley.Parser(self.rules, tree.data) |
|
|
|
|
|
|
|
res ,= parser.parse(tree.children) # XXX ambiguity? |
|
|
|
for item in res: |
|
|
|
if isinstance(item, Tree): |
|
|
|
for x in self._reconstruct(item): |
|
|
|
yield x |
|
|
|
else: |
|
|
|
yield item |
|
|
|
|
|
|
|
def reconstruct(self, tree): |
|
|
|
return ''.join(self._reconstruct(tree)) |
|
|
|
|
|
|
|
|