Browse Source

Refactoring and bugfixes in reconstruct.py

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.4
Erez Shinan 6 years ago
parent
commit
1cc8bc9848
1 changed files with 62 additions and 35 deletions
  1. +62
    -35
      lark/reconstruct.py

+ 62
- 35
lark/reconstruct.py View File

@@ -1,9 +1,9 @@
from collections import defaultdict

from .tree import Tree
from .tree import Tree, Transformer_NoRecurse
from .common import is_terminal, ParserConf, PatternStr
from .lexer import Token
from .parsers import earley
from .parsers import earley, resolve_ambig
from .grammar import Rule


@@ -18,57 +18,82 @@ def is_iter_empty(i):
except StopIteration:
return True

class WriteTokensTransformer(Transformer_NoRecurse):
def __init__(self, tokens):
self.tokens = tokens

def __default__(self, t):
if not isinstance(t, MatchTree):
return t

iter_args = iter(t.children)
to_write = []
for sym in t.orig_expansion:
if is_discarded_terminal(sym):
t = self.tokens[sym]
assert isinstance(t.pattern, PatternStr)
to_write.append(t.pattern.value)
else:
x = next(iter_args)
if isinstance(x, list):
to_write += x
else:
if isinstance(x, Token):
assert x.type == sym, x
else:
assert x.data == sym, (sym, x)
to_write.append(x)

assert is_iter_empty(iter_args)
return to_write


class MatchTree(Tree):
pass

class MakeMatchTree:
def __init__(self, name, expansion):
self.name = name
self.expansion = expansion

def __call__(self, args):
t = MatchTree(self.name, args)
t.orig_expansion = self.expansion
return t

class Reconstructor:
def __init__(self, parser):
# Recreate the rules to assume a standard lexer
_tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
tokens = {t.name:t for t in _tokens}


class WriteTokens:
def __init__(self, name, expansion):
self.name = name
self.expansion = expansion

def f(self, args):
iter_args = iter(args)
to_write = []
for sym in self.expansion:
if is_discarded_terminal(sym):
t = tokens[sym]
assert isinstance(t.pattern, PatternStr)
to_write.append(t.pattern.value)
else:
x = next(iter_args)
if isinstance(x, list):
to_write += x
else:
if isinstance(x, Token):
assert x.type == sym, x
else:
assert x.data == sym, x
to_write.append(x)

assert is_iter_empty(iter_args)
return to_write

expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}

d = defaultdict(list)
for r in rules:
# Rules can match their alias
if r.alias:
d[r.alias].append(r.expansion)
d[r.origin].append([r.alias])
else:
d[r.origin].append(r.expansion)

self.rules = []
# Expanded rules can match their own terminal
for sym in r.expansion:
if sym in expand1s:
d[sym].append([sym.upper()])

reduced_rules = defaultdict(list)
for name, expansions in d.items():
for expansion in expansions:
reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper()
for sym in expansion if not is_discarded_terminal(sym)]

self.rules.append(Rule(name, reduced, WriteTokens(name, expansion).f, None))
reduced_rules[name, tuple(reduced)].append(expansion)

self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None)
for (name, reduced), expansions in reduced_rules.items()]

self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})


def _match(self, term, token):
@@ -80,8 +105,10 @@ class Reconstructor:

def _reconstruct(self, tree):
# TODO: ambiguity?
parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match)
res = parser.parse(tree.children)
parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig)
unreduced_tree = parser.parse(tree.children) # find a full derivation
assert unreduced_tree.data == tree.data
res = self.write_tokens.transform(unreduced_tree)
for item in res:
if isinstance(item, Tree):
for x in self._reconstruct(item):


Loading…
Cancel
Save