diff --git a/docs/grammar.md b/docs/grammar.md index f92b013..b899b3f 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -289,3 +289,16 @@ Note that `%ignore` directives cannot be imported. Imported rules will abide by Declare a terminal without defining it. Useful for plugins. +### %override + +Override a rule, affecting all the rules that refer to it. + +Useful for implementing an inheritance pattern when importing grammars. + +**Example:** +```perl +%import my_grammar (start, number, NUMBER) + +// Add hex support to my_grammar +%override number: NUMBER | /0x\w+/ +``` diff --git a/lark/grammars/lark.lark b/lark/grammars/lark.lark index c16db47..6858846 100644 --- a/lark/grammars/lark.lark +++ b/lark/grammars/lark.lark @@ -15,6 +15,7 @@ priority: "." NUMBER statement: "%ignore" expansions -> ignore | "%import" import_path ["->" name] -> import | "%import" import_path name_list -> multi_import + | "%override" rule -> override_rule | "%declare" name+ -> declare !import_path: "."? name ("." name)* diff --git a/lark/lexer.py b/lark/lexer.py index 730d95e..72b299c 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -307,7 +307,8 @@ class TraditionalLexer(Lexer): if t.pattern.min_width == 0: raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) - assert set(conf.ignore) <= {t.name for t in terminals} + if not (set(conf.ignore) <= {t.name for t in terminals}): + raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals})) # Init self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index d663215..0fafc1c 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -93,6 +93,7 @@ TERMINALS = { 'COMMENT': r'\s*//[^\n]*', '_TO': '->', '_IGNORE': r'%ignore', + '_OVERRIDE': r'%override', '_DECLARE': r'%declare', '_IMPORT': r'%import', 'NUMBER': r'[+-]?\d+', @@ -148,7 +149,8 @@ RULES = { 'term': ['TERMINAL _COLON expansions _NL', 'TERMINAL _DOT NUMBER _COLON expansions _NL'], - 'statement': ['ignore', 'import', 'declare'], + 'statement': ['ignore', 'import', 'declare', 'override_rule'], + 'override_rule': ['_OVERRIDE rule'], 'ignore': ['_IGNORE expansions _NL'], 'declare': ['_DECLARE _declare_args _NL'], 'import': ['_IMPORT _import_path _NL', @@ -947,6 +949,7 @@ class GrammarLoader: # Execute statements ignore, imports = [], {} + overriding_rules = [] for (stmt,) in statements: if stmt.data == 'ignore': t ,= stmt.children @@ -995,6 +998,9 @@ class GrammarLoader: elif stmt.data == 'declare': for t in stmt.children: term_defs.append([t.value, (None, None)]) + elif stmt.data == 'override_rule': + r ,= stmt.children + overriding_rules.append(options_from_rule(*r.children)) else: assert False, stmt @@ -1007,6 +1013,17 @@ class GrammarLoader: term_defs += new_td rule_defs += new_rd + # replace rules by overridding rules, according to name + for r in overriding_rules: + name = r[0] + # remove overridden rule from rule_defs + overridden, rule_defs = classify_bool(rule_defs, lambda r: r[0] == name) # FIXME inefficient + if not overridden: + raise GrammarError("Cannot override a nonexisting rule: %s" % name) + rule_defs.append(r) + + ## Handle terminals + # Verify correctness 1 for name, _ in term_defs: if name.startswith('__'): @@ -1043,10 +1060,10 @@ class GrammarLoader: resolve_term_references(term_defs) - rules = rule_defs + ## Handle rules rule_names = {} - for name, params, _x, option in rules: + for name, params, _x, option in rule_defs: # We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders if self.global_keep_all_tokens: option.keep_all_tokens = True @@ -1057,7 +1074,7 @@ class GrammarLoader: raise GrammarError("Rule '%s' defined more than once" % name) rule_names[name] = len(params) - for name, params , expansions, _o in rules: + for name, params , expansions, _o in rule_defs: for i, p in enumerate(params): if p in rule_names: raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) @@ -1080,7 +1097,7 @@ class GrammarLoader: if sym not in rule_names and sym not in params: raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) - return Grammar(rules, term_defs, ignore_names) + return Grammar(rule_defs, term_defs, ignore_names) def load_grammar(grammar, source, import_paths, global_keep_all_tokens): diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 363f897..3ce76f6 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -21,6 +21,19 @@ class TestGrammar(TestCase): else: assert False, "example did not raise an error" + def test_override(self): + # Overrides the 'sep' template in existing grammar to add an optional terminating delimiter + # Thus extending it beyond its original capacity + p = Lark(""" + %import .test_templates_import (start, sep) + + %override sep{item, delim}: item (delim item)* delim? + %ignore " " + """) + + a = p.parse('[1, 2, 3]') + b = p.parse('[1, 2, 3, ]') + assert a == b