|
- import re
- from collections import defaultdict
-
- from .tree import Tree
- from .common import is_terminal, ParserConf
- from .lexer import Token, TokenDef__Str
- from .parsers import earley
- from .lark import Lark
-
-
-
- def is_discarded_terminal(t):
- return is_terminal(t) and t.startswith('_')
-
- def is_iter_empty(i):
- try:
- _ = next(i)
- return False
- except StopIteration:
- return True
-
- class Reconstructor:
- def __init__(self, parser):
- tokens = {t.name:t for t in parser.lexer_conf.tokens}
- token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens}
-
- class MatchData:
- def __init__(self, data):
- self.data = data
-
- class MatchTerminal(MatchData):
- def __call__(self, other):
- return token_res[self.data].match(other) is not None
-
- class MatchTree(MatchData):
- def __call__(self, other):
- try:
- return self.data == other.data
- except AttributeError:
- return False
-
- class WriteTokens:
- def __init__(self, name, expansion):
- self.name = name
- self.expansion = expansion
-
- def f(self, args):
- args2 = iter(args)
- to_write = []
- for sym in self.expansion:
- if is_discarded_terminal(sym):
- t = tokens[sym]
- assert isinstance(t, TokenDef__Str)
- to_write.append(t.value)
- else:
- x = next(args2)
- if isinstance(x, list):
- to_write += x
- else:
- if isinstance(x, Token):
- assert x.type == sym, x
- else:
- assert x.data == sym, x
- to_write.append(x)
-
- assert is_iter_empty(args2)
-
- return to_write
-
- d = defaultdict(list)
- for name, expansions in parser.rules.items():
- for expansion, alias in expansions:
- if alias:
- d[alias].append(expansion)
- d[name].append([alias])
- else:
- d[name].append(expansion)
-
- rules = []
- expand1s = {name.lstrip('!').lstrip('?') for name in d
- if name.startswith(('?', '!?'))} # XXX Ugly code
-
- for name, expansions in d.items():
- for expansion in expansions:
- reduced = [sym if sym.startswith('_') or sym in expand1s else
- (MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym),)
- for sym in expansion if not is_discarded_terminal(sym)]
-
- name = name.lstrip('!').lstrip('?')
-
- rules.append((name, reduced, WriteTokens(name, expansion).f))
- self.rules = rules
-
-
- def _reconstruct(self, tree):
- parser = earley.Parser(ParserConf(self.rules, {}, tree.data))
-
- res ,= parser.parse(tree.children) # XXX ambiguity?
- for item in res:
- if isinstance(item, Tree):
- for x in self._reconstruct(item):
- yield x
- else:
- yield item
-
- def reconstruct(self, tree):
- return ''.join(self._reconstruct(tree))
-
|