From be979f2e634655e9840dc19ab29aad0f66f8e473 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Fri, 25 Dec 2020 20:11:50 +0100 Subject: [PATCH] Added %extend for both rules and terminals. --- docs/grammar.md | 19 ++++++++++++++++ lark/load_grammar.py | 53 +++++++++++++++++++++++++++++++++++++++---- tests/test_grammar.py | 28 +++++++++++++++++++---- 3 files changed, 91 insertions(+), 9 deletions(-) diff --git a/docs/grammar.md b/docs/grammar.md index d6d4b3b..0d77420 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -302,3 +302,22 @@ Useful for implementing an inheritance pattern when importing grammars. // Add hex support to my_grammar %override number: NUMBER | /0x\w+/ ``` + +### %extend + +Extend the definition of a rule or terminal, e.g. add a new option on what it can match, like when separated with `|`. + +Useful for splitting up a definition of a complex rule with many different options over multiple files. + +Can also be used to implement a plugin system where a core grammar is extended by others. + + +**Example:** +```perl +%import my_grammar (start, NUMBER) + +// Add hex support to my_grammar +%extend NUMBER: /0x\w+/ +``` + +For both `%extend` and `%override`, there is not requirement for a rule/terminal to come from another file, but that is probably the most common usecase \ No newline at end of file diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 7383c17..0915c33 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -95,6 +95,7 @@ TERMINALS = { '_IGNORE': r'%ignore', '_OVERRIDE': r'%override', '_DECLARE': r'%declare', + '_EXTEND': r'%extend', '_IMPORT': r'%import', 'NUMBER': r'[+-]?\d+', } @@ -149,8 +150,11 @@ RULES = { 'term': ['TERMINAL _COLON expansions _NL', 'TERMINAL _DOT NUMBER _COLON expansions _NL'], - 'statement': ['ignore', 'import', 'declare', 'override'], - 'override': ['_OVERRIDE rule', '_OVERRIDE term'], + 'statement': ['ignore', 'import', 'declare', 'override', 'extend'], + 'override': ['_OVERRIDE rule', + '_OVERRIDE term'], + 'extend': ['_EXTEND rule', + '_EXTEND term'], 'ignore': ['_IGNORE expansions _NL'], 'declare': ['_DECLARE _declare_args _NL'], 'import': ['_IMPORT _import_path _NL', @@ -744,8 +748,8 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): with a 'namespace' prefix, except for those which are aliased. """ - imported_terms = dict(grammar.term_defs) - imported_rules = {n:(n,p,deepcopy(t),o) for n,p,t,o in grammar.rule_defs} + imported_terms = {n: (deepcopy(e), p) for n, (e, p) in grammar.term_defs} + imported_rules = {n: (n, p, deepcopy(t), o) for n, p, t, o in grammar.rule_defs} term_defs = [] rule_defs = [] @@ -858,6 +862,14 @@ def _find_used_symbols(tree): return {t for x in tree.find_data('expansion') for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} +def extend_expansions(tree, new): + assert isinstance(tree, Tree) and tree.data == 'expansions' + assert isinstance(new, Tree) and new.data == 'expansions' + while len(tree.children) == 2: + assert isinstance(tree.children[0], Tree) and tree.children[0].data == 'expansions', tree + tree = tree.children[0] + tree.children.insert(0, new) + class GrammarLoader: ERRORS = [ @@ -951,6 +963,8 @@ class GrammarLoader: ignore, imports = [], {} overriding_rules = [] overriding_terms = [] + extend_rules = [] + extend_terms = [] for (stmt,) in statements: if stmt.data == 'ignore': t ,= stmt.children @@ -1008,6 +1022,15 @@ class GrammarLoader: overriding_terms.append((r.children[0].value, (r.children[1], 1))) else: overriding_terms.append((r.children[0].value, (r.children[2], int(r.children[1])))) + elif stmt.data == 'extend': + r ,= stmt.children + if r.data == 'rule': + extend_rules.append(options_from_rule(*r.children)) + else: + if len(r.children) == 2: + extend_terms.append((r.children[0].value, (r.children[1], 1))) + else: + extend_terms.append((r.children[0].value, (r.children[2], int(r.children[1])))) else: assert False, stmt @@ -1037,8 +1060,30 @@ class GrammarLoader: if not overridden: raise GrammarError("Cannot override a nonexisting terminal: %s" % name) term_defs.append(t) + + # Extend the definition of rules + for r in extend_rules: + name = r[0] + # remove overridden rule from rule_defs + for old in rule_defs: + if old[0] == name: + if len(old[1]) != len(r[1]): + raise GrammarError("Cannot extend templates with different parameters: %s" % name) + extend_expansions(old[2], r[2]) + break + else: + raise GrammarError("Can't extend rule %s as it wasn't defined before" % name) + # Same for terminals + + for name, (e, _) in extend_terms: + for old in term_defs: + if old[0] == name: + extend_expansions(old[1][0], e) + break + else: + raise GrammarError("Can't extend terminal %s as it wasn't defined before" % name) ## Handle terminals # Verify correctness 1 diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 760d563..ad29c75 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -3,7 +3,7 @@ from __future__ import absolute_import import sys from unittest import TestCase, main -from lark import Lark, Token +from lark import Lark, Token, Tree from lark.load_grammar import GrammarLoader, GrammarError @@ -40,12 +40,30 @@ class TestGrammar(TestCase): %import .grammars.ab (startab, A, B) - %override A: "C" - %override B: "D" + %override A: "c" + %override B: "d" """, start='startab', source_path=__file__) - a = p.parse('CD') - self.assertEqual(a.children[0].children, [Token('A', 'C'), Token('B', 'D')]) + a = p.parse('cd') + self.assertEqual(a.children[0].children, [Token('A', 'c'), Token('B', 'd')]) + + def test_extend_rule(self): + p = Lark(""" + %import .grammars.ab (startab, A, B, expr) + + %extend expr: B A + """, start='startab', source_path=__file__) + a = p.parse('abab') + self.assertEqual(a.children[0].children, ['a', Tree('expr', ['b', 'a']), 'b']) + + def test_extend_term(self): + p = Lark(""" + %import .grammars.ab (startab, A, B, expr) + + %extend A: "c" + """, start='startab', source_path=__file__) + a = p.parse('acbb') + self.assertEqual(a.children[0].children, ['a', Tree('expr', ['c', 'b']), 'b'])