Browse Source

Implemented Templates

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.6
MegaIng1 4 years ago
parent
commit
b8f8448a0b
1 changed files with 117 additions and 19 deletions
  1. +117
    -19
      lark/load_grammar.py

+ 117
- 19
lark/load_grammar.py View File

@@ -99,13 +99,14 @@ TERMINALS = {
RULES = {
'start': ['_list'],
'_list': ['_item', '_list _item'],
'_item': ['rule', 'rule_template', 'term', 'statement', '_NL'],
'_item': ['rule', 'template', 'term', 'statement', '_NL'],

'template': ['RULE _LBRACE template_params _RBRACE _COLON expansions _NL',
'RULE _LBRACE template_params _RBRACE _DOT NUMBER _COLON expansions _NL'],

'template_params': ['RULE',
'template_params _COMMA RULE'],
'template_params': ['_template_params'],
'_template_params': ['RULE',
'_template_params _COMMA RULE'],

'rule': ['RULE _COLON expansions _NL',
'RULE _DOT NUMBER _COLON expansions _NL'],
@@ -142,9 +143,9 @@ RULES = {
'maybe': ['_LBRA expansions _RBRA'],
'range': ['STRING _DOTDOT STRING'],
'template_usage': ['RULE _LBRACE template_args _RBRACE'],
'template_args': ['atom',
'template_args _COMMA atom'],
'template_usage': ['RULE _LBRACE _template_args _RBRACE'],
'_template_args': ['value',
'_template_args _COMMA value'],

'term': ['TERMINAL _COLON expansions _NL',
'TERMINAL _DOT NUMBER _COLON expansions _NL'],
@@ -353,6 +354,44 @@ class PrepareAnonTerminals(Transformer_InPlace):

return Terminal(term_name, filter_out=isinstance(p, PatternStr))

class _ReplaceSymbols(Transformer_InPlace):
" Helper for ApplyTemplates "
def __init__(self):
super(_ReplaceSymbols, self).__init__()
self.names = {}

def value(self, c):
if len(c) == 1 and isinstance(c[0], Token) and c[0].type == 'RULE' and c[0].value in self.names:
return self.names[c[0].value]
return self.__default__('value', c, None)

class ApplyTemplates(Transformer_InPlace):
" Apply the templates, creating new rules that represent the used templates "

def __init__(self, temp_defs, rule_defs):
super(ApplyTemplates, self).__init__()
self.temp_defs = temp_defs
self.rule_defs = rule_defs
self.replacer = _ReplaceSymbols()
self.created_templates = set()
def _get_template_name(self, name, args):
return "_%s{%s}" % (name, ",".join(a.name for a in args))
def template_usage(self, c):
name = c[0]
args = c[1:]
result_name = self._get_template_name(name.value, args)
if result_name not in self.created_templates:
(_n, params, tree, options) ,= (t for t in self.temp_defs if t[0] == name)
assert len(params) == len(args), args
result_tree = deepcopy(tree)
self.replacer.names = dict(zip(params, args))
self.replacer.transform(result_tree)
self.rule_defs.append((result_name, result_tree, deepcopy(options)))
return NonTerminal(result_name)


def _rfind(s, choices):
return max(s.rfind(c) for c in choices)
@@ -452,9 +491,10 @@ def _choice_of_rules(rules):
return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules])

class Grammar:
def __init__(self, rule_defs, term_defs, ignore):
def __init__(self, rule_defs, term_defs, temp_defs, ignore):
self.term_defs = term_defs
self.rule_defs = rule_defs
self.temp_defs = temp_defs
self.ignore = ignore

def compile(self, start):
@@ -462,6 +502,7 @@ class Grammar:
# So deepcopy allows calling compile more than once.
term_defs = deepcopy(list(self.term_defs))
rule_defs = deepcopy(self.rule_defs)
temp_defs = deepcopy(self.temp_defs)

# ===================
# Compile Terminals
@@ -478,29 +519,38 @@ class Grammar:

transformer = PrepareLiterals() * TerminalTreeToPattern()
terminals = [TerminalDef(name, transformer.transform( term_tree ), priority)
for name, (term_tree, priority) in term_defs if term_tree]
for name, (term_tree, priority) in term_defs if term_tree]

# =================
# Compile Rules
# =================
# TODO: add templates

# 1. Pre-process terminals
transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals
transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals

# 2. Inline Templates

# 2. Convert EBNF to BNF (and apply step 1)
transformer *= ApplyTemplates(temp_defs, rule_defs)

# 3. Convert EBNF to BNF (and apply step 1 & 2)
ebnf_to_bnf = EBNF_to_BNF()
rules = []
for name, rule_tree, options in rule_defs:
i = 0
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates
name, rule_tree, options = rule_defs[i]
ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options.keep_all_tokens else None
ebnf_to_bnf.prefix = name
tree = transformer.transform(rule_tree)
res = ebnf_to_bnf.transform(tree)
rules.append((name, res, options))
i += 1
rules += ebnf_to_bnf.new_rules

assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision"

# 3. Compile tree to Rule objects
# 4. Compile tree to Rule objects
rule_tree_to_text = RuleTreeToText()

simplify_rule = SimplifyRule_Visitor()
@@ -589,9 +639,11 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases):

imported_terms = dict(grammar.term_defs)
imported_rules = {n:(n,deepcopy(t),o) for n,t,o in grammar.rule_defs}
imported_temps = {n:(n,deepcopy(t),o) for n,t,o in grammar.temp_defs}

term_defs = []
rule_defs = []
temp_defs = []

def rule_dependencies(symbol):
if symbol.type != 'RULE':
@@ -661,8 +713,8 @@ def resolve_term_references(term_defs):
raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name)


def options_from_rule(name, *x,is_template=False):
if len(x) > (1+is_template):
def options_from_rule(name, *x):
if len(x) > 1:
priority, expansions = x
priority = int(priority)
else:
@@ -676,6 +728,22 @@ def options_from_rule(name, *x,is_template=False):

return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority)

def options_from_template(name, params, *x):
if len(x) > 1:
priority, expansions = x
priority = int(priority)
else:
expansions ,= x
priority = None
params = [t.value for t in params.children]

keep_all_tokens = name.startswith('!')
name = name.lstrip('!')
expand1 = name.startswith('?')
name = name.lstrip('?')

return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority)


def symbols_from_strcase(expansion):
return [Terminal(x, filter_out=x.startswith('_')) if x.isupper() else NonTerminal(x) for x in expansion]
@@ -741,14 +809,14 @@ class GrammarLoader:
defs = classify(tree.children, lambda c: c.data, lambda c: c.children)
term_defs = defs.pop('term', [])
rule_defs = defs.pop('rule', [])
template_defs = defs.pop('template', [])
temp_defs = defs.pop('template', [])
statements = defs.pop('statement', [])
assert not defs

term_defs = [td if len(td)==3 else (td[0], 1, td[1]) for td in term_defs]
term_defs = [(name.value, (t, int(p))) for name, p, t in term_defs]
rule_defs = [options_from_rule(*x) for x in rule_defs]
template_defs = [options_from_rule(*x, is_template=True) for x in rule_defs]
temp_defs = [options_from_template(*x) for x in temp_defs]

# Execute statements
ignore, imports = [], {}
@@ -804,10 +872,11 @@ class GrammarLoader:
for dotted_path, (base_paths, aliases) in imports.items():
grammar_path = os.path.join(*dotted_path) + EXT
g = import_grammar(grammar_path, base_paths=base_paths)
new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)
new_td, new_rd, new_tp = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases)

term_defs += new_td
rule_defs += new_rd
temp_defs += new_tp

# Verify correctness 1
for name, _ in term_defs:
@@ -854,6 +923,17 @@ class GrammarLoader:
if name in rule_names:
raise GrammarError("Rule '%s' defined more than once" % name)
rule_names.add(name)
temp_names = set()
for name, _p, _x, _o in temp_defs:
if name.startswith('__'):
raise GrammarError('Names starting with double-underscore are reserved (Error at %s (template))' % name)
if name.startswith('_'): # TODO: rethink this decision (not the error msg)
raise GrammarError('Templates are always inline, they should not start with a underscore (Error ar %s)' % name)
if name in temp_names:
raise GrammarError("Template '%s' defined more than once" % name)
temp_names.add(name)
if name in rule_names:
raise GrammarError("Template '%s' conflicts with rule of same name" % name)

for name, expansions, _o in rules:
for sym in _find_used_symbols(expansions):
@@ -861,10 +941,28 @@ class GrammarLoader:
if sym not in terminal_names:
raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name))
else:
if sym not in rule_names:
if sym not in rule_names and sym not in temp_names: # TODO: check that sym is actually used as template
raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name))

return Grammar(rules, term_defs, ignore_names)
for name, params, expansions, _o in temp_defs:
for i, p in enumerate(params):
if p in rule_names:
raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name))
if p in temp_names:
raise GrammarError("Template Parameter conflicts with template %s (in template %s)" % (p, name))
if p in params[:i]:
raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name))
for sym in _find_used_symbols(expansions):
if sym.type == 'TERMINAL':
if sym not in terminal_names:
raise GrammarError("Token '%s' used but not defined (in template %s)" % (sym, name))
else:
if sym not in rule_names and sym not in temp_names and sym not in params:
raise GrammarError("Rule '%s' used but not defined (in template %s)" % (sym, name))
# TODO: check that sym is actually used as template
# TODO: number of template arguments matches requirement

return Grammar(rules, term_defs, temp_defs, ignore_names)





Loading…
Cancel
Save