diff --git a/lark/lexer.py b/lark/lexer.py index 730d95e..72b299c 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -307,7 +307,8 @@ class TraditionalLexer(Lexer): if t.pattern.min_width == 0: raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) - assert set(conf.ignore) <= {t.name for t in terminals} + if not (set(conf.ignore) <= {t.name for t in terminals}): + raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals})) # Init self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index a07769f..f7da69d 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -93,6 +93,7 @@ TERMINALS = { 'COMMENT': r'\s*//[^\n]*', '_TO': '->', '_IGNORE': r'%ignore', + '_OVERRIDE': r'%override', '_DECLARE': r'%declare', '_IMPORT': r'%import', 'NUMBER': r'[+-]?\d+', @@ -148,7 +149,8 @@ RULES = { 'term': ['TERMINAL _COLON expansions _NL', 'TERMINAL _DOT NUMBER _COLON expansions _NL'], - 'statement': ['ignore', 'import', 'declare'], + 'statement': ['ignore', 'import', 'declare', 'override_rule'], + 'override_rule': ['_OVERRIDE rule'], 'ignore': ['_IGNORE expansions _NL'], 'declare': ['_DECLARE _declare_args _NL'], 'import': ['_IMPORT _import_path _NL', @@ -949,6 +951,7 @@ class GrammarLoader: # Execute statements ignore, imports = [], {} + overriding_rules = [] for (stmt,) in statements: if stmt.data == 'ignore': t ,= stmt.children @@ -997,6 +1000,9 @@ class GrammarLoader: elif stmt.data == 'declare': for t in stmt.children: term_defs.append([t.value, (None, None)]) + elif stmt.data == 'override_rule': + r ,= stmt.children + overriding_rules.append(options_from_rule(*r.children)) else: assert False, stmt @@ -1009,6 +1015,15 @@ class GrammarLoader: term_defs += new_td rule_defs += new_rd + for r in overriding_rules: + name = r[0] + overridden, rule_defs = classify_bool(rule_defs, lambda r: r[0] == name) # FIXME inefficient + if not overridden: + raise GrammarError("Cannot override a nonexisting rule: %s" % name) + rule_defs.append(r) + + ## Handle terminals + # Verify correctness 1 for name, _ in term_defs: if name.startswith('__'): @@ -1045,10 +1060,10 @@ class GrammarLoader: resolve_term_references(term_defs) - rules = rule_defs + ## Handle rules rule_names = {} - for name, params, _x, option in rules: + for name, params, _x, option in rule_defs: # We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders if self.global_keep_all_tokens: option.keep_all_tokens = True @@ -1059,7 +1074,7 @@ class GrammarLoader: raise GrammarError("Rule '%s' defined more than once" % name) rule_names[name] = len(params) - for name, params , expansions, _o in rules: + for name, params , expansions, _o in rule_defs: for i, p in enumerate(params): if p in rule_names: raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) @@ -1082,7 +1097,7 @@ class GrammarLoader: if sym not in rule_names and sym not in params: raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) - return Grammar(rules, term_defs, ignore_names) + return Grammar(rule_defs, term_defs, ignore_names) def load_grammar(grammar, source, import_paths, global_keep_all_tokens): diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 363f897..3ce76f6 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -21,6 +21,19 @@ class TestGrammar(TestCase): else: assert False, "example did not raise an error" + def test_override(self): + # Overrides the 'sep' template in existing grammar to add an optional terminating delimiter + # Thus extending it beyond its original capacity + p = Lark(""" + %import .test_templates_import (start, sep) + + %override sep{item, delim}: item (delim item)* delim? + %ignore " " + """) + + a = p.parse('[1, 2, 3]') + b = p.parse('[1, 2, 3, ]') + assert a == b