Browse Source

Fixed reconstruct

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 7 years ago
parent
commit
d11c67fea0
2 changed files with 30 additions and 25 deletions
  1. +3
    -3
      examples/reconstruct_json.py
  2. +27
    -22
      lark/reconstruct.py

+ 3
- 3
examples/reconstruct_json.py View File

@@ -23,9 +23,9 @@ test_json = '''
} }
''' '''


def test_scanless():
def test_earley():


json_parser = Lark(json_grammar, lexer=None)
json_parser = Lark(json_grammar)
tree = json_parser.parse(test_json) tree = json_parser.parse(test_json)


# print ('@@', tree.pretty()) # print ('@@', tree.pretty())
@@ -48,5 +48,5 @@ def test_lalr():
print (new_json) print (new_json)
print (json.loads(new_json) == json.loads(test_json)) print (json.loads(new_json) == json.loads(test_json))


test_scanless()
test_earley()
test_lalr() test_lalr()

+ 27
- 22
lark/reconstruct.py View File

@@ -1,15 +1,16 @@
from collections import defaultdict from collections import defaultdict


from .tree import Tree, Transformer_NoRecurse
from .common import is_terminal, ParserConf, PatternStr
from .tree import Tree
from .visitors import Transformer_InPlace
from .common import ParserConf, PatternStr
from .lexer import Token from .lexer import Token
from .parsers import earley, resolve_ambig from .parsers import earley, resolve_ambig
from .grammar import Rule
from .grammar import Rule, Terminal, NonTerminal






def is_discarded_terminal(t): def is_discarded_terminal(t):
return is_terminal(t) and t.startswith('_')
return t.is_term and t.filter_out


def is_iter_empty(i): def is_iter_empty(i):
try: try:
@@ -18,19 +19,21 @@ def is_iter_empty(i):
except StopIteration: except StopIteration:
return True return True


class WriteTokensTransformer(Transformer_NoRecurse):
class WriteTokensTransformer(Transformer_InPlace):
def __init__(self, tokens): def __init__(self, tokens):
self.tokens = tokens self.tokens = tokens


def __default__(self, t):
if not isinstance(t, MatchTree):
return t
def __default__(self, data, children, meta):
# if not isinstance(t, MatchTree):
# return t
if not getattr(meta, 'match_tree', False):
return Tree(data, children)


iter_args = iter(t.children)
iter_args = iter(children)
to_write = [] to_write = []
for sym in t.orig_expansion:
for sym in meta.orig_expansion:
if is_discarded_terminal(sym): if is_discarded_terminal(sym):
t = self.tokens[sym]
t = self.tokens[sym.name]
assert isinstance(t.pattern, PatternStr) assert isinstance(t.pattern, PatternStr)
to_write.append(t.pattern.value) to_write.append(t.pattern.value)
else: else:
@@ -39,9 +42,9 @@ class WriteTokensTransformer(Transformer_NoRecurse):
to_write += x to_write += x
else: else:
if isinstance(x, Token): if isinstance(x, Token):
assert x.type == sym, x
assert Terminal(x.type) == sym, x
else: else:
assert x.data == sym, (sym, x)
assert NonTerminal(x.data) == sym, (sym, x)
to_write.append(x) to_write.append(x)


assert is_iter_empty(iter_args) assert is_iter_empty(iter_args)
@@ -58,13 +61,14 @@ class MakeMatchTree:


def __call__(self, args): def __call__(self, args):
t = MatchTree(self.name, args) t = MatchTree(self.name, args)
t.orig_expansion = self.expansion
t.meta.match_tree = True
t.meta.orig_expansion = self.expansion
return t return t


class Reconstructor: class Reconstructor:
def __init__(self, parser): def __init__(self, parser):
# Recreate the rules to assume a standard lexer # Recreate the rules to assume a standard lexer
_tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
_tokens, rules, _grammar_extra = parser.grammar.compile()


expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1} expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}


@@ -72,25 +76,26 @@ class Reconstructor:
for r in rules: for r in rules:
# Rules can match their alias # Rules can match their alias
if r.alias: if r.alias:
d[r.alias].append(r.expansion)
d[r.origin].append([r.alias])
alias = NonTerminal(r.alias)
d[alias].append(r.expansion)
d[r.origin].append([alias])
else: else:
d[r.origin].append(r.expansion) d[r.origin].append(r.expansion)


# Expanded rules can match their own terminal # Expanded rules can match their own terminal
for sym in r.expansion: for sym in r.expansion:
if sym in expand1s: if sym in expand1s:
d[sym].append([sym.upper()])
d[sym].append([Terminal(sym.name)])


reduced_rules = defaultdict(list) reduced_rules = defaultdict(list)
for name, expansions in d.items(): for name, expansions in d.items():
for expansion in expansions: for expansion in expansions:
reduced = [sym if sym.startswith('_') or sym in expand1s else sym.upper()
reduced = [sym if sym.name.startswith('_') or sym in expand1s else Terminal(sym.name)
for sym in expansion if not is_discarded_terminal(sym)] for sym in expansion if not is_discarded_terminal(sym)]


reduced_rules[name, tuple(reduced)].append(expansion) reduced_rules[name, tuple(reduced)].append(expansion)


self.rules = [Rule(name, list(reduced), MakeMatchTree(name, expansions[0]), None)
self.rules = [Rule(name, list(reduced), MakeMatchTree(name.name, expansions[0]), None)
for (name, reduced), expansions in reduced_rules.items()] for (name, reduced), expansions in reduced_rules.items()]


self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens}) self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
@@ -98,9 +103,9 @@ class Reconstructor:


def _match(self, term, token): def _match(self, term, token):
if isinstance(token, Tree): if isinstance(token, Tree):
return token.data.upper() == term
return Terminal(token.data) == term
elif isinstance(token, Token): elif isinstance(token, Token):
return term == token.type
return term == Terminal(token.type)
assert False assert False


def _reconstruct(self, tree): def _reconstruct(self, tree):


Loading…
Cancel
Save