From 230aad94a7c5df9b4a1bc4ef05dd873bd236f12e Mon Sep 17 00:00:00 2001 From: julienmalard Date: Tue, 30 Jun 2020 08:07:31 -0400 Subject: [PATCH 1/7] Added reconstructor tests for tokens to keep ("!") and for expanded rules. --- tests/test_reconstructor.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py index ecab499..7a896a0 100644 --- a/tests/test_reconstructor.py +++ b/tests/test_reconstructor.py @@ -69,6 +69,36 @@ class TestReconstructor(TestCase): self.assert_reconstruct(g, code) + def test_keep_tokens(self): + g = """ + start: (NL | stmt)* + stmt: var op var + !op: ("+" | "-" | "*" | "/") + var: WORD + NL: /(\\r?\\n)+\s*/ + """ + common + + code = """ + a+b + """ + + self.assert_reconstruct(g, code) + + @unittest.skip('Not working yet') + def test_expand_rule(self): + g = """ + ?start: (NL | mult_stmt)* + ?mult_stmt: sum_stmt ["*" sum_stmt] + ?sum_stmt: var ["+" var] + var: WORD + NL: /(\\r?\\n)+\s*/ + """ + common + + code = ['a', 'a*b', 'a+b', 'a*b+c', 'a+b*c', 'a+b*c+d'] + + for c in code: + self.assert_reconstruct(g, c) + def test_json_example(self): test_json = ''' { From 5d01f0ae68cea8c72186bd3d8ca6b30154f099cd Mon Sep 17 00:00:00 2001 From: julienmalard Date: Tue, 30 Jun 2020 08:08:07 -0400 Subject: [PATCH 2/7] test keep tokens in reconstructor works --- lark/load_grammar.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 407d8d1..4b0deff 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -307,6 +307,7 @@ class PrepareAnonTerminals(Transformer_InPlace): self.term_set = {td.name for td in self.terminals} self.term_reverse = {td.pattern: td for td in terminals} self.i = 0 + self.rule_options = None @inline_args @@ -351,7 +352,10 @@ class PrepareAnonTerminals(Transformer_InPlace): self.term_reverse[p] = termdef self.terminals.append(termdef) - return Terminal(term_name, filter_out=isinstance(p, PatternStr)) + filter_out = False if self.rule_options and self.rule_options.keep_all_tokens else isinstance(p, PatternStr) + + return Terminal(term_name, filter_out=filter_out) + class _ReplaceSymbols(Transformer_InPlace): " Helper for ApplyTemplates " @@ -527,7 +531,8 @@ class Grammar: # ================= # 1. Pre-process terminals - transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals + anon_tokens_transf = PrepareAnonTerminals(terminals) + transformer = PrepareLiterals() * PrepareSymbols() * anon_tokens_transf # Adds to terminals # 2. Inline Templates @@ -542,8 +547,10 @@ class Grammar: i += 1 if len(params) != 0: # Dont transform templates continue - ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options.keep_all_tokens else None + rule_options = RuleOptions(keep_all_tokens=True) if options and options.keep_all_tokens else None + ebnf_to_bnf.rule_options = rule_options ebnf_to_bnf.prefix = name + anon_tokens_transf.rule_options = rule_options tree = transformer.transform(rule_tree) res = ebnf_to_bnf.transform(tree) rules.append((name, res, options)) From 279c3190968ee745fc8af29b932eb5e15d589167 Mon Sep 17 00:00:00 2001 From: julienmalard Date: Tue, 30 Jun 2020 12:25:28 -0400 Subject: [PATCH 3/7] Activate expand rules reconstructor test (fails) --- tests/test_reconstructor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py index 7a896a0..93c64fe 100644 --- a/tests/test_reconstructor.py +++ b/tests/test_reconstructor.py @@ -84,7 +84,6 @@ class TestReconstructor(TestCase): self.assert_reconstruct(g, code) - @unittest.skip('Not working yet') def test_expand_rule(self): g = """ ?start: (NL | mult_stmt)* From bca6cfa45897d019c84396d6932c7fde643509fc Mon Sep 17 00:00:00 2001 From: julienmalard Date: Tue, 30 Jun 2020 12:25:43 -0400 Subject: [PATCH 4/7] Expand rules reconstructor test passes --- lark/reconstruct.py | 46 +++++++++++++++++++++++++++++++------- tests/test_nearley/nearley | 2 +- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 1e3adc7..d6eccf5 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -86,6 +86,14 @@ def best_from_group(seq, group_key, cmp_key): d[key] = item return list(d.values()) + +def make_recons_rule(origin, expansion, old_expansion): + return Rule(origin, expansion, alias=MakeMatchTree(origin.name, old_expansion)) + +def make_recons_rule_to_term(origin, term): + return make_recons_rule(origin, [Terminal(term.name)], [term]) + + class Reconstructor: def __init__(self, parser, term_subs={}): # XXX TODO calling compile twice returns different results! @@ -93,6 +101,8 @@ class Reconstructor: tokens, rules, _grammar_extra = parser.grammar.compile(parser.options.start) self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens}, term_subs) + self.rules_for_root = defaultdict(list) + self.rules = list(self._build_recons_rules(rules)) self.rules.reverse() @@ -100,9 +110,8 @@ class Reconstructor: self.rules = best_from_group(self.rules, lambda r: r, lambda r: -len(r.expansion)) self.rules.sort(key=lambda r: len(r.expansion)) - callbacks = {rule: rule.alias for rule in self.rules} # TODO pass callbacks through dict, instead of alias? - self.parser = earley.Parser(ParserConf(self.rules, callbacks, parser.options.start), - self._match, resolve_ambiguity=True) + self.parser = parser + self._parser_cache = {} def _build_recons_rules(self, rules): expand1s = {r.origin for r in rules if r.options.expand1} @@ -116,22 +125,35 @@ class Reconstructor: nonterminals = {sym for sym in rule_names if sym.name.startswith('_') or sym in expand1s or sym in aliases } + seen = set() for r in rules: recons_exp = [sym if sym in nonterminals else Terminal(sym.name) for sym in r.expansion if not is_discarded_terminal(sym)] # Skip self-recursive constructs - if recons_exp == [r.origin]: + if recons_exp == [r.origin] and r.alias is None: continue sym = NonTerminal(r.alias) if r.alias else r.origin + rule = make_recons_rule(sym, recons_exp, r.expansion) + + if sym in expand1s and len(recons_exp) != 1: + self.rules_for_root[sym.name].append(rule) - yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion)) + if sym.name not in seen: + yield make_recons_rule_to_term(sym, sym) + seen.add(sym.name) + else: + if sym.name.startswith('_') or sym in expand1s: + yield rule + else: + self.rules_for_root[sym.name].append(rule) + # yield rule # Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion)) for origin, rule_aliases in aliases.items(): for alias in rule_aliases: - yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)])) - yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin])) + yield make_recons_rule_to_term(origin, NonTerminal(alias)) + yield make_recons_rule_to_term(origin, origin) def _match(self, term, token): if isinstance(token, Tree): @@ -142,7 +164,15 @@ class Reconstructor: def _reconstruct(self, tree): # TODO: ambiguity? - unreduced_tree = self.parser.parse(tree.children, tree.data) # find a full derivation + try: + parser = self._parser_cache[tree.data] + except KeyError: + rules = self.rules + self.rules_for_root[tree.data] + callbacks = {rule: rule.alias for rule in rules} # TODO pass callbacks through dict, instead of alias? + parser = earley.Parser(ParserConf(rules, callbacks, [tree.data]), self._match, resolve_ambiguity=True) + self._parser_cache[tree.data] = parser + + unreduced_tree = parser.parse(tree.children, tree.data) # find a full derivation assert unreduced_tree.data == tree.data res = self.write_tokens.transform(unreduced_tree) for item in res: diff --git a/tests/test_nearley/nearley b/tests/test_nearley/nearley index a46b374..cf8925f 160000 --- a/tests/test_nearley/nearley +++ b/tests/test_nearley/nearley @@ -1 +1 @@ -Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44 +Subproject commit cf8925f729bde741a3076c5856c0c0862bc7f5de From 6b9bd84091fc65e9ab806a9dfcf577d76477dea4 Mon Sep 17 00:00:00 2001 From: julienmalard Date: Tue, 30 Jun 2020 15:05:49 -0400 Subject: [PATCH 5/7] Remove commented out line --- lark/reconstruct.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lark/reconstruct.py b/lark/reconstruct.py index d6eccf5..876c6ae 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -148,7 +148,6 @@ class Reconstructor: yield rule else: self.rules_for_root[sym.name].append(rule) - # yield rule # Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion)) for origin, rule_aliases in aliases.items(): for alias in rule_aliases: From a3368d8a72de3d10cc8a713052dfe0313d18568e Mon Sep 17 00:00:00 2001 From: julienmalard Date: Fri, 3 Jul 2020 10:32:53 -0400 Subject: [PATCH 6/7] Need to remove duplicate rules for root rools as well. --- lark/reconstruct.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 876c6ae..51294a3 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -1,3 +1,4 @@ +import unicodedata from collections import defaultdict from .tree import Tree @@ -166,7 +167,12 @@ class Reconstructor: try: parser = self._parser_cache[tree.data] except KeyError: - rules = self.rules + self.rules_for_root[tree.data] + rules = self.rules + best_from_group( + self.rules_for_root[tree.data], lambda r: r, lambda r: -len(r.expansion) + ) + + rules.sort(key=lambda r: len(r.expansion)) + callbacks = {rule: rule.alias for rule in rules} # TODO pass callbacks through dict, instead of alias? parser = earley.Parser(ParserConf(rules, callbacks, [tree.data]), self._match, resolve_ambiguity=True) self._parser_cache[tree.data] = parser From d631cad024ab8c80df1db58c0202e43eaca321aa Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 6 Aug 2020 15:17:52 +0300 Subject: [PATCH 7/7] Minor adjustments --- lark/reconstruct.py | 3 +-- tests/test_nearley/nearley | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 79e294c..89967b2 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -1,4 +1,3 @@ -import unicodedata from collections import defaultdict from .tree import Tree @@ -131,7 +130,7 @@ class Reconstructor: rule_names = {r.origin for r in rules} nonterminals = {sym for sym in rule_names - if sym.name.startswith('_') or sym in expand1s or sym in aliases } + if sym.name.startswith('_') or sym in expand1s or sym in aliases } seen = set() for r in rules: diff --git a/tests/test_nearley/nearley b/tests/test_nearley/nearley index cf8925f..a46b374 160000 --- a/tests/test_nearley/nearley +++ b/tests/test_nearley/nearley @@ -1 +1 @@ -Subproject commit cf8925f729bde741a3076c5856c0c0862bc7f5de +Subproject commit a46b37471db486db0f6e1ce6a2934fb238346b44