From 1f79a8dfce916f02bb2002011e74b72e5748c565 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 27 Mar 2020 12:23:46 +0100 Subject: [PATCH 01/12] Added template syntax --- lark/load_grammar.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 051f8cd..9356d7e 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -73,6 +73,8 @@ TERMINALS = { '_RPAR': r'\)', '_LBRA': r'\[', '_RBRA': r'\]', + '_LBRACE': r'\{', + '_RBRACE': r'\}', 'OP': '[+*]|[?](?![a-z])', '_COLON': ':', '_COMMA': ',', @@ -97,7 +99,13 @@ TERMINALS = { RULES = { 'start': ['_list'], '_list': ['_item', '_list _item'], - '_item': ['rule', 'term', 'statement', '_NL'], + '_item': ['rule', 'rule_template', 'term', 'statement', '_NL'], + + 'template': ['RULE _LBRACE template_params _RBRACE _COLON expansions _NL', + 'RULE _LBRACE template_params _RBRACE _DOT NUMBER _COLON expansions _NL'], + + 'template_params': ['RULE', + 'template_params _COMMA RULE'], 'rule': ['RULE _COLON expansions _NL', 'RULE _DOT NUMBER _COLON expansions _NL'], @@ -123,7 +131,8 @@ RULES = { 'value': ['terminal', 'nonterminal', 'literal', - 'range'], + 'range', + 'template_usage'], 'terminal': ['TERMINAL'], 'nonterminal': ['RULE'], @@ -132,9 +141,13 @@ RULES = { 'maybe': ['_LBRA expansions _RBRA'], 'range': ['STRING _DOTDOT STRING'], + + 'template_usage': ['RULE _LBRACE template_args _RBRACE'], + 'template_args': ['atom', + 'template_args _COMMA atom'], 'term': ['TERMINAL _COLON expansions _NL', - 'TERMINAL _DOT NUMBER _COLON expansions _NL'], + 'TERMINAL _DOT NUMBER _COLON expansions _NL'], 'statement': ['ignore', 'import', 'declare'], 'ignore': ['_IGNORE expansions _NL'], 'declare': ['_DECLARE _declare_args _NL'], @@ -648,8 +661,8 @@ def resolve_term_references(term_defs): raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name) -def options_from_rule(name, *x): - if len(x) > 1: +def options_from_rule(name, *x,is_template=False): + if len(x) > (1+is_template): priority, expansions = x priority = int(priority) else: @@ -728,12 +741,14 @@ class GrammarLoader: defs = classify(tree.children, lambda c: c.data, lambda c: c.children) term_defs = defs.pop('term', []) rule_defs = defs.pop('rule', []) + template_defs = defs.pop('template', []) statements = defs.pop('statement', []) assert not defs term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] rule_defs = [options_from_rule(*x) for x in rule_defs] + template_defs = [options_from_rule(*x, is_template=True) for x in rule_defs] # Execute statements ignore, imports = [], {} From 732a835b539f6e4c57055fe81250a7eb9b878f47 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 27 Mar 2020 12:24:04 +0100 Subject: [PATCH 02/12] Added template tests --- tests/test_parser.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_parser.py b/tests/test_parser.py index 7edfd3a..9a98f54 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -810,6 +810,20 @@ def _make_parser_test(LEXER, PARSER): x = g.parse('Hello HelloWorld') self.assertSequenceEqual(x.children, ['HelloWorld']) + def test_templates(self): + g = _Lark(r""" + start: number_list "\n" number_dict + sep{item, delim}: item (delim item)* + number_list: "[" sep{NUMBER, ","} "]" + number_dict: "{" sep{(NUMBER ":" NUMBER), ";"} "}" // Just to test this + NUMBER: /\d+/ + %ignore " " + """) + x = g.parse("[1, 2, 3, 4] {1:2, 3:4, 5:6}") + print(x) + x = g.parse("[1] {1:2}") + print(x) + def test_token_collision_WS(self): g = _Lark(r"""start: "Hello" NAME NAME: /\w/+ From ed17512c3a3cee0131f24623fea4d119ea1edd8b Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sat, 28 Mar 2020 01:54:38 +0100 Subject: [PATCH 03/12] Corrected & Simplified test --- tests/test_parser.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 2a64c77..d41b028 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -813,16 +813,15 @@ def _make_parser_test(LEXER, PARSER): def test_templates(self): g = _Lark(r""" - start: number_list "\n" number_dict + start: "[" sep{NUMBER, ","} "]" sep{item, delim}: item (delim item)* - number_list: "[" sep{NUMBER, ","} "]" - number_dict: "{" sep{(NUMBER ":" NUMBER), ";"} "}" // Just to test this NUMBER: /\d+/ %ignore " " """) - x = g.parse("[1, 2, 3, 4] {1:2, 3:4, 5:6}") - print(x) - x = g.parse("[1] {1:2}") + x = g.parse("[1, 2, 3, 4]") + self.assertSequenceEqual(x.children,['1', '2', '3', '4']) + x = g.parse("[1]") + self.assertSequenceEqual(x.children,['1']) print(x) def test_token_collision_WS(self): From b8f8448a0b4cd3457641a7e9eb13fe11b5a9f75c Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sat, 28 Mar 2020 01:55:17 +0100 Subject: [PATCH 04/12] Implemented Templates --- lark/load_grammar.py | 136 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 117 insertions(+), 19 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 9356d7e..800de4e 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -99,13 +99,14 @@ TERMINALS = { RULES = { 'start': ['_list'], '_list': ['_item', '_list _item'], - '_item': ['rule', 'rule_template', 'term', 'statement', '_NL'], + '_item': ['rule', 'template', 'term', 'statement', '_NL'], 'template': ['RULE _LBRACE template_params _RBRACE _COLON expansions _NL', 'RULE _LBRACE template_params _RBRACE _DOT NUMBER _COLON expansions _NL'], - 'template_params': ['RULE', - 'template_params _COMMA RULE'], + 'template_params': ['_template_params'], + '_template_params': ['RULE', + '_template_params _COMMA RULE'], 'rule': ['RULE _COLON expansions _NL', 'RULE _DOT NUMBER _COLON expansions _NL'], @@ -142,9 +143,9 @@ RULES = { 'maybe': ['_LBRA expansions _RBRA'], 'range': ['STRING _DOTDOT STRING'], - 'template_usage': ['RULE _LBRACE template_args _RBRACE'], - 'template_args': ['atom', - 'template_args _COMMA atom'], + 'template_usage': ['RULE _LBRACE _template_args _RBRACE'], + '_template_args': ['value', + '_template_args _COMMA value'], 'term': ['TERMINAL _COLON expansions _NL', 'TERMINAL _DOT NUMBER _COLON expansions _NL'], @@ -353,6 +354,44 @@ class PrepareAnonTerminals(Transformer_InPlace): return Terminal(term_name, filter_out=isinstance(p, PatternStr)) +class _ReplaceSymbols(Transformer_InPlace): + " Helper for ApplyTemplates " + + def __init__(self): + super(_ReplaceSymbols, self).__init__() + self.names = {} + + def value(self, c): + if len(c) == 1 and isinstance(c[0], Token) and c[0].type == 'RULE' and c[0].value in self.names: + return self.names[c[0].value] + return self.__default__('value', c, None) + +class ApplyTemplates(Transformer_InPlace): + " Apply the templates, creating new rules that represent the used templates " + + def __init__(self, temp_defs, rule_defs): + super(ApplyTemplates, self).__init__() + self.temp_defs = temp_defs + self.rule_defs = rule_defs + self.replacer = _ReplaceSymbols() + self.created_templates = set() + + def _get_template_name(self, name, args): + return "_%s{%s}" % (name, ",".join(a.name for a in args)) + + def template_usage(self, c): + name = c[0] + args = c[1:] + result_name = self._get_template_name(name.value, args) + if result_name not in self.created_templates: + (_n, params, tree, options) ,= (t for t in self.temp_defs if t[0] == name) + assert len(params) == len(args), args + result_tree = deepcopy(tree) + self.replacer.names = dict(zip(params, args)) + self.replacer.transform(result_tree) + self.rule_defs.append((result_name, result_tree, deepcopy(options))) + return NonTerminal(result_name) + def _rfind(s, choices): return max(s.rfind(c) for c in choices) @@ -452,9 +491,10 @@ def _choice_of_rules(rules): return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) class Grammar: - def __init__(self, rule_defs, term_defs, ignore): + def __init__(self, rule_defs, term_defs, temp_defs, ignore): self.term_defs = term_defs self.rule_defs = rule_defs + self.temp_defs = temp_defs self.ignore = ignore def compile(self, start): @@ -462,6 +502,7 @@ class Grammar: # So deepcopy allows calling compile more than once. term_defs = deepcopy(list(self.term_defs)) rule_defs = deepcopy(self.rule_defs) + temp_defs = deepcopy(self.temp_defs) # =================== # Compile Terminals @@ -478,29 +519,38 @@ class Grammar: transformer = PrepareLiterals() * TerminalTreeToPattern() terminals = [TerminalDef(name, transformer.transform( term_tree ), priority) - for name, (term_tree, priority) in term_defs if term_tree] + for name, (term_tree, priority) in term_defs if term_tree] # ================= # Compile Rules # ================= + + # TODO: add templates # 1. Pre-process terminals - transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals + transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals + + # 2. Inline Templates - # 2. Convert EBNF to BNF (and apply step 1) + transformer *= ApplyTemplates(temp_defs, rule_defs) + + # 3. Convert EBNF to BNF (and apply step 1 & 2) ebnf_to_bnf = EBNF_to_BNF() rules = [] - for name, rule_tree, options in rule_defs: + i = 0 + while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates + name, rule_tree, options = rule_defs[i] ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options.keep_all_tokens else None ebnf_to_bnf.prefix = name tree = transformer.transform(rule_tree) res = ebnf_to_bnf.transform(tree) rules.append((name, res, options)) + i += 1 rules += ebnf_to_bnf.new_rules assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" - # 3. Compile tree to Rule objects + # 4. Compile tree to Rule objects rule_tree_to_text = RuleTreeToText() simplify_rule = SimplifyRule_Visitor() @@ -589,9 +639,11 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): imported_terms = dict(grammar.term_defs) imported_rules = {n:(n,deepcopy(t),o) for n,t,o in grammar.rule_defs} + imported_temps = {n:(n,deepcopy(t),o) for n,t,o in grammar.temp_defs} term_defs = [] rule_defs = [] + temp_defs = [] def rule_dependencies(symbol): if symbol.type != 'RULE': @@ -661,8 +713,8 @@ def resolve_term_references(term_defs): raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name) -def options_from_rule(name, *x,is_template=False): - if len(x) > (1+is_template): +def options_from_rule(name, *x): + if len(x) > 1: priority, expansions = x priority = int(priority) else: @@ -676,6 +728,22 @@ def options_from_rule(name, *x,is_template=False): return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) +def options_from_template(name, params, *x): + if len(x) > 1: + priority, expansions = x + priority = int(priority) + else: + expansions ,= x + priority = None + params = [t.value for t in params.children] + + keep_all_tokens = name.startswith('!') + name = name.lstrip('!') + expand1 = name.startswith('?') + name = name.lstrip('?') + + return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) + def symbols_from_strcase(expansion): return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion] @@ -741,14 +809,14 @@ class GrammarLoader: defs = classify(tree.children, lambda c: c.data, lambda c: c.children) term_defs = defs.pop('term', []) rule_defs = defs.pop('rule', []) - template_defs = defs.pop('template', []) + temp_defs = defs.pop('template', []) statements = defs.pop('statement', []) assert not defs term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] rule_defs = [options_from_rule(*x) for x in rule_defs] - template_defs = [options_from_rule(*x, is_template=True) for x in rule_defs] + temp_defs = [options_from_template(*x) for x in temp_defs] # Execute statements ignore, imports = [], {} @@ -804,10 +872,11 @@ class GrammarLoader: for dotted_path, (base_paths, aliases) in imports.items(): grammar_path = os.path.join(*dotted_path) + EXT g = import_grammar(grammar_path, base_paths=base_paths) - new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) + new_td, new_rd, new_tp = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) term_defs += new_td rule_defs += new_rd + temp_defs += new_tp # Verify correctness 1 for name, _ in term_defs: @@ -854,6 +923,17 @@ class GrammarLoader: if name in rule_names: raise GrammarError("Rule '%s' defined more than once" % name) rule_names.add(name) + temp_names = set() + for name, _p, _x, _o in temp_defs: + if name.startswith('__'): + raise GrammarError('Names starting with double-underscore are reserved (Error at %s (template))' % name) + if name.startswith('_'): # TODO: rethink this decision (not the error msg) + raise GrammarError('Templates are always inline, they should not start with a underscore (Error ar %s)' % name) + if name in temp_names: + raise GrammarError("Template '%s' defined more than once" % name) + temp_names.add(name) + if name in rule_names: + raise GrammarError("Template '%s' conflicts with rule of same name" % name) for name, expansions, _o in rules: for sym in _find_used_symbols(expansions): @@ -861,10 +941,28 @@ class GrammarLoader: if sym not in terminal_names: raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) else: - if sym not in rule_names: + if sym not in rule_names and sym not in temp_names: # TODO: check that sym is actually used as template raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) - return Grammar(rules, term_defs, ignore_names) + for name, params, expansions, _o in temp_defs: + for i, p in enumerate(params): + if p in rule_names: + raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) + if p in temp_names: + raise GrammarError("Template Parameter conflicts with template %s (in template %s)" % (p, name)) + if p in params[:i]: + raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name)) + for sym in _find_used_symbols(expansions): + if sym.type == 'TERMINAL': + if sym not in terminal_names: + raise GrammarError("Token '%s' used but not defined (in template %s)" % (sym, name)) + else: + if sym not in rule_names and sym not in temp_names and sym not in params: + raise GrammarError("Rule '%s' used but not defined (in template %s)" % (sym, name)) + # TODO: check that sym is actually used as template + # TODO: number of template arguments matches requirement + + return Grammar(rules, term_defs, temp_defs, ignore_names) From 0c1c48411dafa23aaab28c946d4934f1f6d27bcc Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sat, 28 Mar 2020 12:21:47 +0100 Subject: [PATCH 05/12] Added test for recursive templates + implemented them --- lark/load_grammar.py | 1 + tests/test_parser.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 800de4e..16f69ac 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -384,6 +384,7 @@ class ApplyTemplates(Transformer_InPlace): args = c[1:] result_name = self._get_template_name(name.value, args) if result_name not in self.created_templates: + self.created_templates.add(result_name) (_n, params, tree, options) ,= (t for t in self.temp_defs if t[0] == name) assert len(params) == len(args), args result_tree = deepcopy(tree) diff --git a/tests/test_parser.py b/tests/test_parser.py index d41b028..a664bcc 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -822,7 +822,18 @@ def _make_parser_test(LEXER, PARSER): self.assertSequenceEqual(x.children,['1', '2', '3', '4']) x = g.parse("[1]") self.assertSequenceEqual(x.children,['1']) - print(x) + + def test_templates_recursion(self): + g = _Lark(r""" + start: "[" sep{NUMBER, ","} "]" + sep{item, delim}: item | sep{item, delim} delim item + NUMBER: /\d+/ + %ignore " " + """) + x = g.parse("[1, 2, 3, 4]") + self.assertSequenceEqual(x.children,['1', '2', '3', '4']) + x = g.parse("[1]") + self.assertSequenceEqual(x.children,['1']) def test_token_collision_WS(self): g = _Lark(r"""start: "Hello" NAME From 8bf5da697ac8afacbc2669e5eb5b9d6ceaf8b05b Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sat, 28 Mar 2020 12:22:57 +0100 Subject: [PATCH 06/12] Added test for template imports and implemented them --- lark/load_grammar.py | 39 ++++++++++++++++++++------------ tests/grammars/templates.lark | 1 + tests/test_parser.py | 7 ++++++ tests/test_templates_import.lark | 4 ++++ 4 files changed, 36 insertions(+), 15 deletions(-) create mode 100644 tests/grammars/templates.lark create mode 100644 tests/test_templates_import.lark diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 16f69ac..080cc37 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -525,8 +525,6 @@ class Grammar: # ================= # Compile Rules # ================= - - # TODO: add templates # 1. Pre-process terminals transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals @@ -640,7 +638,7 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): imported_terms = dict(grammar.term_defs) imported_rules = {n:(n,deepcopy(t),o) for n,t,o in grammar.rule_defs} - imported_temps = {n:(n,deepcopy(t),o) for n,t,o in grammar.temp_defs} + imported_temps = {n:(n,p,deepcopy(t),o) for n,p,t,o in grammar.temp_defs} term_defs = [] rule_defs = [] @@ -649,12 +647,13 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): def rule_dependencies(symbol): if symbol.type != 'RULE': return [] - try: - _, tree, _ = imported_rules[symbol] - except KeyError: + if symbol in imported_rules: + return _find_used_symbols(imported_rules[symbol][1]) + elif symbol in imported_temps: + return _find_used_symbols(imported_temps[symbol][2]) - set(imported_temps[symbol][1]) + else: raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) - return _find_used_symbols(tree) def get_namespace_name(name): @@ -671,14 +670,24 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): term_defs.append([get_namespace_name(symbol), imported_terms[symbol]]) else: assert symbol.type == 'RULE' - rule = imported_rules[symbol] - for t in rule[1].iter_subtrees(): - for i, c in enumerate(t.children): - if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): - t.children[i] = Token(c.type, get_namespace_name(c)) - rule_defs.append((get_namespace_name(symbol), rule[1], rule[2])) - - return term_defs, rule_defs + if symbol in imported_rules: + rule = imported_rules[symbol] + for t in rule[1].iter_subtrees(): + for i, c in enumerate(t.children): + if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): + t.children[i] = Token(c.type, get_namespace_name(c)) + rule_defs.append((get_namespace_name(symbol), rule[1], rule[2])) + else: + temp = imported_temps[symbol] + for t in temp[2].iter_subtrees(): + for i, c in enumerate(t.children): + if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): + t.children[i] = Token(c.type, get_namespace_name(c)) + params = [('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in temp[1]] + temp_defs.append((get_namespace_name(symbol), params, temp[2], temp[3])) + + + return term_defs, rule_defs, temp_defs diff --git a/tests/grammars/templates.lark b/tests/grammars/templates.lark new file mode 100644 index 0000000..1631188 --- /dev/null +++ b/tests/grammars/templates.lark @@ -0,0 +1 @@ +sep{item, delim}: item (delim item)* \ No newline at end of file diff --git a/tests/test_parser.py b/tests/test_parser.py index a664bcc..5a0313d 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -835,6 +835,13 @@ def _make_parser_test(LEXER, PARSER): x = g.parse("[1]") self.assertSequenceEqual(x.children,['1']) + def test_templates_import(self): + g = _Lark_open("test_templates_import.lark", rel_to=__file__) + x = g.parse("[1, 2, 3, 4]") + self.assertSequenceEqual(x.children,['1', '2', '3', '4']) + x = g.parse("[1]") + self.assertSequenceEqual(x.children,['1']) + def test_token_collision_WS(self): g = _Lark(r"""start: "Hello" NAME NAME: /\w/+ diff --git a/tests/test_templates_import.lark b/tests/test_templates_import.lark new file mode 100644 index 0000000..a1272b8 --- /dev/null +++ b/tests/test_templates_import.lark @@ -0,0 +1,4 @@ +start: "[" sep{NUMBER, ","} "]" +NUMBER: /\d+/ +%ignore " " +%import .grammars.templates.sep \ No newline at end of file From 2daca647d43e71d2b3b1db3ecea0aa8198ca14be Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sat, 28 Mar 2020 23:03:49 +0100 Subject: [PATCH 07/12] Unified rules and templates --- lark/load_grammar.py | 168 +++++++++++++++++-------------------------- tests/test_parser.py | 59 ++++++++------- 2 files changed, 94 insertions(+), 133 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 080cc37..4a277a1 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -99,17 +99,14 @@ TERMINALS = { RULES = { 'start': ['_list'], '_list': ['_item', '_list _item'], - '_item': ['rule', 'template', 'term', 'statement', '_NL'], + '_item': ['rule', 'term', 'statement', '_NL'], - 'template': ['RULE _LBRACE template_params _RBRACE _COLON expansions _NL', - 'RULE _LBRACE template_params _RBRACE _DOT NUMBER _COLON expansions _NL'], - - 'template_params': ['_template_params'], + 'rule': ['RULE template_params _COLON expansions _NL', + 'RULE template_params _DOT NUMBER _COLON expansions _NL'], + 'template_params': ['_LBRACE _template_params _RBRACE', + ''], '_template_params': ['RULE', '_template_params _COMMA RULE'], - - 'rule': ['RULE _COLON expansions _NL', - 'RULE _DOT NUMBER _COLON expansions _NL'], 'expansions': ['alias', 'expansions _OR alias', 'expansions _NL _OR alias'], @@ -369,28 +366,31 @@ class _ReplaceSymbols(Transformer_InPlace): class ApplyTemplates(Transformer_InPlace): " Apply the templates, creating new rules that represent the used templates " - def __init__(self, temp_defs, rule_defs): + def __init__(self, rule_defs): super(ApplyTemplates, self).__init__() - self.temp_defs = temp_defs self.rule_defs = rule_defs self.replacer = _ReplaceSymbols() self.created_templates = set() - def _get_template_name(self, name, args): - return "_%s{%s}" % (name, ",".join(a.name for a in args)) - def template_usage(self, c): name = c[0] args = c[1:] - result_name = self._get_template_name(name.value, args) + result_name = "%s{%s}" % (name, ",".join(a.name for a in args)) if result_name not in self.created_templates: self.created_templates.add(result_name) - (_n, params, tree, options) ,= (t for t in self.temp_defs if t[0] == name) + (_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name) assert len(params) == len(args), args result_tree = deepcopy(tree) self.replacer.names = dict(zip(params, args)) self.replacer.transform(result_tree) - self.rule_defs.append((result_name, result_tree, deepcopy(options))) + if name[0] != '_': + if result_tree.data == 'expansions': + for i, c in enumerate(result_tree.children): + if not (isinstance(c, Tree) and c.data == 'alias'): + result_tree.children[i] = ST('alias', [c, name]) + elif result_tree.data != 'alias': + result_tree = ST('alias', [result_tree, name]) + self.rule_defs.append((result_name, [], result_tree, deepcopy(options))) return NonTerminal(result_name) @@ -492,10 +492,9 @@ def _choice_of_rules(rules): return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules]) class Grammar: - def __init__(self, rule_defs, term_defs, temp_defs, ignore): + def __init__(self, rule_defs, term_defs, ignore): self.term_defs = term_defs self.rule_defs = rule_defs - self.temp_defs = temp_defs self.ignore = ignore def compile(self, start): @@ -503,7 +502,6 @@ class Grammar: # So deepcopy allows calling compile more than once. term_defs = deepcopy(list(self.term_defs)) rule_defs = deepcopy(self.rule_defs) - temp_defs = deepcopy(self.temp_defs) # =================== # Compile Terminals @@ -531,20 +529,22 @@ class Grammar: # 2. Inline Templates - transformer *= ApplyTemplates(temp_defs, rule_defs) + transformer *= ApplyTemplates(rule_defs) # 3. Convert EBNF to BNF (and apply step 1 & 2) ebnf_to_bnf = EBNF_to_BNF() rules = [] i = 0 while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates - name, rule_tree, options = rule_defs[i] + name, params, rule_tree, options = rule_defs[i] + i += 1 + if len(params) != 0: # Dont transform templates + continue ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options.keep_all_tokens else None ebnf_to_bnf.prefix = name tree = transformer.transform(rule_tree) res = ebnf_to_bnf.transform(tree) rules.append((name, res, options)) - i += 1 rules += ebnf_to_bnf.new_rules assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" @@ -637,26 +637,28 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): """ imported_terms = dict(grammar.term_defs) - imported_rules = {n:(n,deepcopy(t),o) for n,t,o in grammar.rule_defs} - imported_temps = {n:(n,p,deepcopy(t),o) for n,p,t,o in grammar.temp_defs} + imported_rules = {n:(n,p,deepcopy(t),o) for n,p,t,o in grammar.rule_defs} term_defs = [] rule_defs = [] - temp_defs = [] def rule_dependencies(symbol): if symbol.type != 'RULE': return [] - if symbol in imported_rules: - return _find_used_symbols(imported_rules[symbol][1]) - elif symbol in imported_temps: - return _find_used_symbols(imported_temps[symbol][2]) - set(imported_temps[symbol][1]) - else: + try: + _, params, tree,_ = imported_rules[symbol] + except KeyError: raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) + return _find_used_symbols(tree) - set(params) - def get_namespace_name(name): + def get_namespace_name(name, params): + if params is not None: + try: + return params[name] + except KeyError: + pass try: return aliases[name].value except KeyError: @@ -667,27 +669,20 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): to_import = list(bfs(aliases, rule_dependencies)) for symbol in to_import: if symbol.type == 'TERMINAL': - term_defs.append([get_namespace_name(symbol), imported_terms[symbol]]) + term_defs.append([get_namespace_name(symbol, None), imported_terms[symbol]]) else: assert symbol.type == 'RULE' - if symbol in imported_rules: - rule = imported_rules[symbol] - for t in rule[1].iter_subtrees(): - for i, c in enumerate(t.children): - if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): - t.children[i] = Token(c.type, get_namespace_name(c)) - rule_defs.append((get_namespace_name(symbol), rule[1], rule[2])) - else: - temp = imported_temps[symbol] - for t in temp[2].iter_subtrees(): - for i, c in enumerate(t.children): - if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): - t.children[i] = Token(c.type, get_namespace_name(c)) - params = [('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in temp[1]] - temp_defs.append((get_namespace_name(symbol), params, temp[2], temp[3])) + _, params, tree, options = imported_rules[symbol] + params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params} + for t in tree.iter_subtrees(): + for i, c in enumerate(t.children): + if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): + t.children[i] = Token(c.type, get_namespace_name(c, params_map)) + params = [params_map[p] for p in params] # We can not rely on ordered dictionaries + rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) - return term_defs, rule_defs, temp_defs + return term_defs, rule_defs @@ -723,29 +718,14 @@ def resolve_term_references(term_defs): raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name) -def options_from_rule(name, *x): +def options_from_rule(name, params, *x): if len(x) > 1: priority, expansions = x priority = int(priority) else: expansions ,= x priority = None - - keep_all_tokens = name.startswith('!') - name = name.lstrip('!') - expand1 = name.startswith('?') - name = name.lstrip('?') - - return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) - -def options_from_template(name, params, *x): - if len(x) > 1: - priority, expansions = x - priority = int(priority) - else: - expansions ,= x - priority = None - params = [t.value for t in params.children] + params = [t.value for t in params.children] if params is not None else [] # For the grammar parser keep_all_tokens = name.startswith('!') name = name.lstrip('!') @@ -775,8 +755,8 @@ class GrammarLoader: def __init__(self): terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] - rules = [options_from_rule(name, x) for name, x in RULES.items()] - rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, xs, o in rules for i, x in enumerate(xs)] + rules = [options_from_rule(name, None, x) for name, x in RULES.items()] + rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] callback = ParseTreeBuilder(rules, ST).create_callback() lexer_conf = LexerConf(terminals, ['WS', 'COMMENT']) @@ -819,14 +799,12 @@ class GrammarLoader: defs = classify(tree.children, lambda c: c.data, lambda c: c.children) term_defs = defs.pop('term', []) rule_defs = defs.pop('rule', []) - temp_defs = defs.pop('template', []) statements = defs.pop('statement', []) assert not defs term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs] term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs] rule_defs = [options_from_rule(*x) for x in rule_defs] - temp_defs = [options_from_template(*x) for x in temp_defs] # Execute statements ignore, imports = [], {} @@ -882,11 +860,10 @@ class GrammarLoader: for dotted_path, (base_paths, aliases) in imports.items(): grammar_path = os.path.join(*dotted_path) + EXT g = import_grammar(grammar_path, base_paths=base_paths) - new_td, new_rd, new_tp = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) + new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) term_defs += new_td rule_defs += new_rd - temp_defs += new_tp # Verify correctness 1 for name, _ in term_defs: @@ -926,53 +903,38 @@ class GrammarLoader: rules = rule_defs - rule_names = set() - for name, _x, _o in rules: + rule_names = {} + for name, params, _x, _o in rules: if name.startswith('__'): raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) if name in rule_names: raise GrammarError("Rule '%s' defined more than once" % name) - rule_names.add(name) - temp_names = set() - for name, _p, _x, _o in temp_defs: - if name.startswith('__'): - raise GrammarError('Names starting with double-underscore are reserved (Error at %s (template))' % name) - if name.startswith('_'): # TODO: rethink this decision (not the error msg) - raise GrammarError('Templates are always inline, they should not start with a underscore (Error ar %s)' % name) - if name in temp_names: - raise GrammarError("Template '%s' defined more than once" % name) - temp_names.add(name) - if name in rule_names: - raise GrammarError("Template '%s' conflicts with rule of same name" % name) - - for name, expansions, _o in rules: - for sym in _find_used_symbols(expansions): - if sym.type == 'TERMINAL': - if sym not in terminal_names: - raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) - else: - if sym not in rule_names and sym not in temp_names: # TODO: check that sym is actually used as template - raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) + rule_names[name] = len(params) - for name, params, expansions, _o in temp_defs: + for name, params , expansions, _o in rules: for i, p in enumerate(params): if p in rule_names: raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) - if p in temp_names: - raise GrammarError("Template Parameter conflicts with template %s (in template %s)" % (p, name)) if p in params[:i]: raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name)) + for temp in expansions.find_data('template_usage'): + sym = temp.children[0] + args = temp.children[1:] + if sym not in rule_names: + raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) + if len(args) != rule_names[sym]: + raise GrammarError("Wrong number of template arguments used for %s " + "(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name)) for sym in _find_used_symbols(expansions): if sym.type == 'TERMINAL': if sym not in terminal_names: - raise GrammarError("Token '%s' used but not defined (in template %s)" % (sym, name)) + raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) else: - if sym not in rule_names and sym not in temp_names and sym not in params: - raise GrammarError("Rule '%s' used but not defined (in template %s)" % (sym, name)) - # TODO: check that sym is actually used as template - # TODO: number of template arguments matches requirement + if sym not in rule_names and sym not in params: + raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) + - return Grammar(rules, term_defs, temp_defs, ignore_names) + return Grammar(rules, term_defs, ignore_names) diff --git a/tests/test_parser.py b/tests/test_parser.py index 5a0313d..6ac4a98 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -811,6 +811,27 @@ def _make_parser_test(LEXER, PARSER): x = g.parse('Hello HelloWorld') self.assertSequenceEqual(x.children, ['HelloWorld']) + def test_token_collision_WS(self): + g = _Lark(r"""start: "Hello" NAME + NAME: /\w/+ + %import common.WS + %ignore WS + """) + x = g.parse('Hello World') + self.assertSequenceEqual(x.children, ['World']) + x = g.parse('Hello HelloWorld') + self.assertSequenceEqual(x.children, ['HelloWorld']) + + def test_token_collision2(self): + g = _Lark(""" + !start: "starts" + + %import common.LCASE_LETTER + """) + + x = g.parse("starts") + self.assertSequenceEqual(x.children, ['starts']) + def test_templates(self): g = _Lark(r""" start: "[" sep{NUMBER, ","} "]" @@ -819,51 +840,29 @@ def _make_parser_test(LEXER, PARSER): %ignore " " """) x = g.parse("[1, 2, 3, 4]") - self.assertSequenceEqual(x.children,['1', '2', '3', '4']) + self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])]) x = g.parse("[1]") - self.assertSequenceEqual(x.children,['1']) + self.assertSequenceEqual(x.children, [Tree('sep', ['1'])]) def test_templates_recursion(self): g = _Lark(r""" - start: "[" sep{NUMBER, ","} "]" - sep{item, delim}: item | sep{item, delim} delim item + start: "[" _sep{NUMBER, ","} "]" + _sep{item, delim}: item | _sep{item, delim} delim item NUMBER: /\d+/ %ignore " " """) x = g.parse("[1, 2, 3, 4]") - self.assertSequenceEqual(x.children,['1', '2', '3', '4']) + self.assertSequenceEqual(x.children, ['1', '2', '3', '4']) x = g.parse("[1]") - self.assertSequenceEqual(x.children,['1']) + self.assertSequenceEqual(x.children, ['1']) def test_templates_import(self): g = _Lark_open("test_templates_import.lark", rel_to=__file__) x = g.parse("[1, 2, 3, 4]") - self.assertSequenceEqual(x.children,['1', '2', '3', '4']) + self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])]) x = g.parse("[1]") - self.assertSequenceEqual(x.children,['1']) + self.assertSequenceEqual(x.children, [Tree('sep', ['1'])]) - def test_token_collision_WS(self): - g = _Lark(r"""start: "Hello" NAME - NAME: /\w/+ - %import common.WS - %ignore WS - """) - x = g.parse('Hello World') - self.assertSequenceEqual(x.children, ['World']) - x = g.parse('Hello HelloWorld') - self.assertSequenceEqual(x.children, ['HelloWorld']) - - - def test_token_collision2(self): - g = _Lark(""" - !start: "starts" - - %import common.LCASE_LETTER - """) - - x = g.parse("starts") - self.assertSequenceEqual(x.children, ['starts']) - def test_g_regex_flags(self): g = _Lark(""" start: "a" /b+/ C From 6d3477bbc7b4bae22aa64bbca9f51823c802eec1 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 29 Mar 2020 01:59:39 +0100 Subject: [PATCH 08/12] Updated lark.lark + added template_lark.lark, showcasing templates --- examples/lark.lark | 16 +++++++---- examples/template_lark.lark | 56 +++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 examples/template_lark.lark diff --git a/examples/lark.lark b/examples/lark.lark index 8a5ac66..fef6b07 100644 --- a/examples/lark.lark +++ b/examples/lark.lark @@ -4,8 +4,11 @@ _item: rule | token | statement -rule: RULE priority? ":" expansions _NL -token: TOKEN priority? ":" expansions _NL +rule: RULE rule_params priority? ":" expansions _NL +token: TOKEN token_params priority? ":" expansions _NL + +rule_params: ["{" RULE ("," RULE)* "}"] +token_params: ["{" TOKEN ("," TOKEN)* "}"] priority: "." NUMBER @@ -27,9 +30,12 @@ name_list: "(" name ("," name)* ")" ?atom: "(" expansions ")" | "[" expansions "]" -> maybe - | STRING ".." STRING -> literal_range - | name - | (REGEXP | STRING) -> literal + | value + +?value: STRING ".." STRING -> literal_range + | name + | (REGEXP | STRING) -> literal + | name "{" value ("," value)* "}" -> template_usage name: RULE | TOKEN diff --git a/examples/template_lark.lark b/examples/template_lark.lark new file mode 100644 index 0000000..296407f --- /dev/null +++ b/examples/template_lark.lark @@ -0,0 +1,56 @@ +start: (_item | _NL)* + +_item: rule + | token + | statement + +_rule_or_token: RULE + | TOKEN +rule: RULE rule_params priority? ":" expansions{_rule_or_token} _NL +token: TOKEN priority? ":" expansions{TOKEN} _NL + +rule_params: ["{" RULE ("," RULE)* "}"] + +priority: "." NUMBER + +statement: "%ignore" expansions{TOKEN} _NL -> ignore + | "%import" import_path{_rule_or_token} ["->" _rule_or_token] _NL -> import + | "%import" import_path{_rule_or_token} name_list{_rule_or_token} _NL -> multi_import + | "%declare" TOKEN+ -> declare + +!import_path{name}: "."? name ("." name)* +name_list{name}: "(" name ("," name)* ")" + +?expansions{name}: alias{name} (_VBAR alias{name})* + +?alias{name}: expansion{name} ["->" RULE] + +?expansion{name}: expr{name}* + +?expr{name}: atom{name} [OP | "~" NUMBER [".." NUMBER]] + +?atom{name}: "(" expansions{name} ")" + | "[" expansions{name} "]" -> maybe + | value{name} + +?value{name}: STRING ".." STRING -> literal_range + | name + | (REGEXP | STRING) -> literal + | name "{" value{name} ("," value{name})* "}" -> template_usage + +_VBAR: _NL? "|" +OP: /[+*]|[?](?![a-z])/ +RULE: /!?[_?]?[a-z][_a-z0-9]*/ +TOKEN: /_?[A-Z][_A-Z0-9]*/ +STRING: _STRING "i"? +REGEXP: /\/(?!\/)(\\\/|\\\\|[^\/\n])*?\/[imslux]*/ +_NL: /(\r?\n)+\s*/ + +%import common.ESCAPED_STRING -> _STRING +%import common.INT -> NUMBER +%import common.WS_INLINE + +COMMENT: /\s*/ "//" /[^\n]/* + +%ignore WS_INLINE +%ignore COMMENT From 3861ee7e07b4f1c55525c8963a53c6e7c11f8e37 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 29 Mar 2020 03:00:09 +0200 Subject: [PATCH 09/12] Correct behaviour of aliases for templates --- lark/load_grammar.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 4a277a1..c1ba95f 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -385,9 +385,13 @@ class ApplyTemplates(Transformer_InPlace): self.replacer.transform(result_tree) if name[0] != '_': if result_tree.data == 'expansions': - for i, c in enumerate(result_tree.children): - if not (isinstance(c, Tree) and c.data == 'alias'): - result_tree.children[i] = ST('alias', [c, name]) + t = result_tree + while len(t.children) == 2: + if t.children[-1].data != 'alias': + t.children[-1] = ST('alias', [t.children[-1], name]) + t = t.children[0] + if t.children[-1].data != 'alias': + t.children[-1] = ST('alias', [t.children[-1], name]) elif result_tree.data != 'alias': result_tree = ST('alias', [result_tree, name]) self.rule_defs.append((result_name, [], result_tree, deepcopy(options))) From 20a2f690ca060593fe5edd8c99e3e1dbbb7ca409 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 29 Mar 2020 15:07:49 +0200 Subject: [PATCH 10/12] Correct behaviour of aliases for templates (attempt 2) --- lark/grammar.py | 8 +++++--- lark/load_grammar.py | 14 ++------------ lark/parse_tree_builder.py | 5 +++-- tests/test_parser.py | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/lark/grammar.py b/lark/grammar.py index cf8cf64..bb84351 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -49,19 +49,21 @@ class NonTerminal(Symbol): class RuleOptions(Serialize): - __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'empty_indices' + __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'template_source', 'empty_indices' - def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()): + def __init__(self, keep_all_tokens=False, expand1=False, priority=None, template_source=None, empty_indices=()): self.keep_all_tokens = keep_all_tokens self.expand1 = expand1 self.priority = priority + self.template_source = template_source self.empty_indices = empty_indices def __repr__(self): - return 'RuleOptions(%r, %r, %r)' % ( + return 'RuleOptions(%r, %r, %r, %r)' % ( self.keep_all_tokens, self.expand1, self.priority, + self.template_source ) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index c1ba95f..3deb758 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -383,17 +383,6 @@ class ApplyTemplates(Transformer_InPlace): result_tree = deepcopy(tree) self.replacer.names = dict(zip(params, args)) self.replacer.transform(result_tree) - if name[0] != '_': - if result_tree.data == 'expansions': - t = result_tree - while len(t.children) == 2: - if t.children[-1].data != 'alias': - t.children[-1] = ST('alias', [t.children[-1], name]) - t = t.children[0] - if t.children[-1].data != 'alias': - t.children[-1] = ST('alias', [t.children[-1], name]) - elif result_tree.data != 'alias': - result_tree = ST('alias', [result_tree, name]) self.rule_defs.append((result_name, [], result_tree, deepcopy(options))) return NonTerminal(result_name) @@ -736,7 +725,8 @@ def options_from_rule(name, params, *x): expand1 = name.startswith('?') name = name.lstrip('?') - return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) + return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority, + template_source=(name if params else None)) def symbols_from_strcase(expansion): diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index 11c7fac..4a9edd3 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -227,9 +227,10 @@ class ParseTreeBuilder: options = rule.options keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens expand_single_child = options.expand1 + from_template = options.template_source is not None wrapper_chain = list(filter(None, [ - (expand_single_child and not rule.alias) and ExpandSingleChild, + (expand_single_child and not (rule.alias and not from_template)) and ExpandSingleChild, maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), self.propagate_positions and PropagatePositions, self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), @@ -243,7 +244,7 @@ class ParseTreeBuilder: for rule, wrapper_chain in self.rule_builders: - user_callback_name = rule.alias or rule.origin.name + user_callback_name = rule.alias or rule.options.template_source or rule.origin.name try: f = getattr(transformer, user_callback_name) # XXX InlineTransformer is deprecated! diff --git a/tests/test_parser.py b/tests/test_parser.py index 6ac4a98..6b9df3f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -863,6 +863,38 @@ def _make_parser_test(LEXER, PARSER): x = g.parse("[1]") self.assertSequenceEqual(x.children, [Tree('sep', ['1'])]) + def test_templates_alias(self): + g = _Lark(r""" + start: expr{"C"} + expr{t}: "A" t + | "B" t -> b + """) + x = g.parse("AC") + self.assertSequenceEqual(x.children, [Tree('expr', [])]) + x = g.parse("BC") + self.assertSequenceEqual(x.children, [Tree('b', [])]) + + def test_templates_modifiers(self): + g = _Lark(r""" + start: expr{"B"} + !expr{t}: "A" t + """) + x = g.parse("AB") + self.assertSequenceEqual(x.children, [Tree('expr', ["A", "B"])]) + g = _Lark(r""" + start: _expr{"B"} + !_expr{t}: "A" t + """) + x = g.parse("AB") + self.assertSequenceEqual(x.children, ["A", "B"]) + g = _Lark(r""" + start: expr{b} + b: "B" + ?expr{t}: "A" t + """) + x = g.parse("AB") + self.assertSequenceEqual(x.children, [Tree('b',[])]) + def test_g_regex_flags(self): g = _Lark(""" start: "a" /b+/ C From a716f54b226341f895aec06d76af078869092c80 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 29 Mar 2020 15:15:39 +0200 Subject: [PATCH 11/12] slight improvement for alias of templates --- lark/parse_tree_builder.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index 4a9edd3..904d186 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -227,10 +227,9 @@ class ParseTreeBuilder: options = rule.options keep_all_tokens = self.always_keep_all_tokens or options.keep_all_tokens expand_single_child = options.expand1 - from_template = options.template_source is not None wrapper_chain = list(filter(None, [ - (expand_single_child and not (rule.alias and not from_template)) and ExpandSingleChild, + (expand_single_child and not rule.alias) and ExpandSingleChild, maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None), self.propagate_positions and PropagatePositions, self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), From 40148d310cdd606878200ca59041bff9da0d7fb0 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Sun, 29 Mar 2020 15:16:27 +0200 Subject: [PATCH 12/12] fix for python2.7 --- lark/load_grammar.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 3deb758..3fe10a3 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -355,7 +355,6 @@ class _ReplaceSymbols(Transformer_InPlace): " Helper for ApplyTemplates " def __init__(self): - super(_ReplaceSymbols, self).__init__() self.names = {} def value(self, c): @@ -367,7 +366,6 @@ class ApplyTemplates(Transformer_InPlace): " Apply the templates, creating new rules that represent the used templates " def __init__(self, rule_defs): - super(ApplyTemplates, self).__init__() self.rule_defs = rule_defs self.replacer = _ReplaceSymbols() self.created_templates = set()