Browse Source

Improved the reconstructor, but it still feels like a lost cause

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Sh 6 years ago
parent
commit
86f1bb1db6
1 changed files with 45 additions and 8 deletions
  1. +45
    -8
      lark/reconstruct.py

+ 45
- 8
lark/reconstruct.py View File

@@ -19,9 +19,13 @@ def is_iter_empty(i):
except StopIteration: except StopIteration:
return True return True



class WriteTokensTransformer(Transformer_InPlace): class WriteTokensTransformer(Transformer_InPlace):
def __init__(self, tokens):
"Inserts discarded tokens into their correct place, according to the rules of grammar"

def __init__(self, tokens, term_subs):
self.tokens = tokens self.tokens = tokens
self.term_subs = term_subs


def __default__(self, data, children, meta): def __default__(self, data, children, meta):
# if not isinstance(t, MatchTree): # if not isinstance(t, MatchTree):
@@ -33,10 +37,15 @@ class WriteTokensTransformer(Transformer_InPlace):
to_write = [] to_write = []
for sym in meta.orig_expansion: for sym in meta.orig_expansion:
if is_discarded_terminal(sym): if is_discarded_terminal(sym):
t = self.tokens[sym.name]
if not isinstance(t.pattern, PatternStr):
raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)
to_write.append(t.pattern.value)
try:
v = self.term_subs[sym.name](sym)
except KeyError:
t = self.tokens[sym.name]
if not isinstance(t.pattern, PatternStr):
raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t)

v = t.pattern.value
to_write.append(v)
else: else:
x = next(iter_args) x = next(iter_args)
if isinstance(x, list): if isinstance(x, list):
@@ -66,14 +75,34 @@ class MakeMatchTree:
t.meta.orig_expansion = self.expansion t.meta.orig_expansion = self.expansion
return t return t


def best_from_group(seq, group_key, cmp_key):
d = {}
for item in seq:
key = group_key(item)
if key in d:
v1 = cmp_key(item)
v2 = cmp_key(d[key])
if v2 > v1:
d[key] = item
else:
d[key] = item
return list(d.values())

class Reconstructor: class Reconstructor:
def __init__(self, parser):
def __init__(self, parser, term_subs={}):
# XXX TODO calling compile twice returns different results! # XXX TODO calling compile twice returns different results!
assert parser.options.maybe_placeholders == False assert parser.options.maybe_placeholders == False
tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start)


self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs)
self.rules = list(self._build_recons_rules(rules)) self.rules = list(self._build_recons_rules(rules))
self.rules.reverse()
# print(len(self.rules))
self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion))
# print(len(self.rules))

# self.rules = list(set(list(self._build_recons_rules(rules))))
self.rules.sort(key=lambda r: len(r.expansion))
callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias? callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias?
self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start), self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start),
self._match, resolve_ambiguity=True) self._match, resolve_ambiguity=True)
@@ -127,4 +156,12 @@ class Reconstructor:
yield item yield item


def reconstruct(self, tree): def reconstruct(self, tree):
return ''.join(self._reconstruct(tree))
x = self._reconstruct(tree)
y = []
prev_item = ''
for item in x:
if prev_item and item and prev_item[-1].isalnum() and item[0].isalnum():
y.append(' ')
y.append(item)
prev_item = item
return ''.join(y)

Loading…
Cancel
Save