Browse Source

Merge pull request #801 from lark-parser/override_stmt

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
Erez Shinan 3 years ago
committed by GitHub
parent
commit
955e64597a
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 51 additions and 6 deletions
  1. +13
    -0
      docs/grammar.md
  2. +1
    -0
      lark/grammars/lark.lark
  3. +2
    -1
      lark/lexer.py
  4. +22
    -5
      lark/load_grammar.py
  5. +13
    -0
      tests/test_grammar.py

+ 13
- 0
docs/grammar.md View File

@@ -289,3 +289,16 @@ Note that `%ignore` directives cannot be imported. Imported rules will abide by


Declare a terminal without defining it. Useful for plugins. Declare a terminal without defining it. Useful for plugins.


### %override

Override a rule, affecting all the rules that refer to it.

Useful for implementing an inheritance pattern when importing grammars.

**Example:**
```perl
%import my_grammar (start, number, NUMBER)

// Add hex support to my_grammar
%override number: NUMBER | /0x\w+/
```

+ 1
- 0
lark/grammars/lark.lark View File

@@ -15,6 +15,7 @@ priority: "." NUMBER
statement: "%ignore" expansions -> ignore statement: "%ignore" expansions -> ignore
| "%import" import_path ["->" name] -> import | "%import" import_path ["->" name] -> import
| "%import" import_path name_list -> multi_import | "%import" import_path name_list -> multi_import
| "%override" rule -> override_rule
| "%declare" name+ -> declare | "%declare" name+ -> declare


!import_path: "."? name ("." name)* !import_path: "."? name ("." name)*


+ 2
- 1
lark/lexer.py View File

@@ -307,7 +307,8 @@ class TraditionalLexer(Lexer):
if t.pattern.min_width == 0: if t.pattern.min_width == 0:
raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern))


assert set(conf.ignore) <= {t.name for t in terminals}
if not (set(conf.ignore) <= {t.name for t in terminals}):
raise LexError("Ignore terminals are not defined: %s" % (set(conf.ignore) - {t.name for t in terminals}))


# Init # Init
self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())) self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))


+ 22
- 5
lark/load_grammar.py View File

@@ -93,6 +93,7 @@ TERMINALS = {
'COMMENT': r'\s*//[^\n]*', 'COMMENT': r'\s*//[^\n]*',
'_TO': '->', '_TO': '->',
'_IGNORE': r'%ignore', '_IGNORE': r'%ignore',
'_OVERRIDE': r'%override',
'_DECLARE': r'%declare', '_DECLARE': r'%declare',
'_IMPORT': r'%import', '_IMPORT': r'%import',
'NUMBER': r'[+-]?\d+', 'NUMBER': r'[+-]?\d+',
@@ -148,7 +149,8 @@ RULES = {


'term': ['TERMINAL _COLON expansions _NL', 'term': ['TERMINAL _COLON expansions _NL',
'TERMINAL _DOT NUMBER _COLON expansions _NL'], 'TERMINAL _DOT NUMBER _COLON expansions _NL'],
'statement': ['ignore', 'import', 'declare'],
'statement': ['ignore', 'import', 'declare', 'override_rule'],
'override_rule': ['_OVERRIDE rule'],
'ignore': ['_IGNORE expansions _NL'], 'ignore': ['_IGNORE expansions _NL'],
'declare': ['_DECLARE _declare_args _NL'], 'declare': ['_DECLARE _declare_args _NL'],
'import': ['_IMPORT _import_path _NL', 'import': ['_IMPORT _import_path _NL',
@@ -947,6 +949,7 @@ class GrammarLoader:


# Execute statements # Execute statements
ignore, imports = [], {} ignore, imports = [], {}
overriding_rules = []
for (stmt,) in statements: for (stmt,) in statements:
if stmt.data == 'ignore': if stmt.data == 'ignore':
t ,= stmt.children t ,= stmt.children
@@ -995,6 +998,9 @@ class GrammarLoader:
elif stmt.data == 'declare': elif stmt.data == 'declare':
for t in stmt.children: for t in stmt.children:
term_defs.append([t.value, (None, None)]) term_defs.append([t.value, (None, None)])
elif stmt.data == 'override_rule':
r ,= stmt.children
overriding_rules.append(options_from_rule(*r.children))
else: else:
assert False, stmt assert False, stmt


@@ -1007,6 +1013,17 @@ class GrammarLoader:
term_defs += new_td term_defs += new_td
rule_defs += new_rd rule_defs += new_rd


# replace rules by overridding rules, according to name
for r in overriding_rules:
name = r[0]
# remove overridden rule from rule_defs
overridden, rule_defs = classify_bool(rule_defs, lambda r: r[0] == name) # FIXME inefficient
if not overridden:
raise GrammarError("Cannot override a nonexisting rule: %s" % name)
rule_defs.append(r)

## Handle terminals

# Verify correctness 1 # Verify correctness 1
for name, _ in term_defs: for name, _ in term_defs:
if name.startswith('__'): if name.startswith('__'):
@@ -1043,10 +1060,10 @@ class GrammarLoader:


resolve_term_references(term_defs) resolve_term_references(term_defs)


rules = rule_defs
## Handle rules


rule_names = {} rule_names = {}
for name, params, _x, option in rules:
for name, params, _x, option in rule_defs:
# We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders # We can't just simply not throw away the tokens later, we need option.keep_all_tokens to correctly generate maybe_placeholders
if self.global_keep_all_tokens: if self.global_keep_all_tokens:
option.keep_all_tokens = True option.keep_all_tokens = True
@@ -1057,7 +1074,7 @@ class GrammarLoader:
raise GrammarError("Rule '%s' defined more than once" % name) raise GrammarError("Rule '%s' defined more than once" % name)
rule_names[name] = len(params) rule_names[name] = len(params)


for name, params , expansions, _o in rules:
for name, params , expansions, _o in rule_defs:
for i, p in enumerate(params): for i, p in enumerate(params):
if p in rule_names: if p in rule_names:
raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
@@ -1080,7 +1097,7 @@ class GrammarLoader:
if sym not in rule_names and sym not in params: if sym not in rule_names and sym not in params:
raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name))


return Grammar(rules, term_defs, ignore_names)
return Grammar(rule_defs, term_defs, ignore_names)




def load_grammar(grammar, source, import_paths, global_keep_all_tokens): def load_grammar(grammar, source, import_paths, global_keep_all_tokens):


+ 13
- 0
tests/test_grammar.py View File

@@ -21,6 +21,19 @@ class TestGrammar(TestCase):
else: else:
assert False, "example did not raise an error" assert False, "example did not raise an error"


def test_override(self):
# Overrides the 'sep' template in existing grammar to add an optional terminating delimiter
# Thus extending it beyond its original capacity
p = Lark("""
%import .test_templates_import (start, sep)

%override sep{item, delim}: item (delim item)* delim?
%ignore " "
""")

a = p.parse('[1, 2, 3]')
b = p.parse('[1, 2, 3, ]')
assert a == b








Loading…
Cancel
Save