|
|
@@ -73,6 +73,8 @@ TERMINALS = { |
|
|
|
'_RPAR': r'\)', |
|
|
|
'_LBRA': r'\[', |
|
|
|
'_RBRA': r'\]', |
|
|
|
'_LBRACE': r'\{', |
|
|
|
'_RBRACE': r'\}', |
|
|
|
'OP': '[+*]|[?](?![a-z])', |
|
|
|
'_COLON': ':', |
|
|
|
'_COMMA': ',', |
|
|
@@ -99,8 +101,12 @@ RULES = { |
|
|
|
'_list': ['_item', '_list _item'], |
|
|
|
'_item': ['rule', 'term', 'statement', '_NL'], |
|
|
|
|
|
|
|
'rule': ['RULE _COLON expansions _NL', |
|
|
|
'RULE _DOT NUMBER _COLON expansions _NL'], |
|
|
|
'rule': ['RULE template_params _COLON expansions _NL', |
|
|
|
'RULE template_params _DOT NUMBER _COLON expansions _NL'], |
|
|
|
'template_params': ['_LBRACE _template_params _RBRACE', |
|
|
|
''], |
|
|
|
'_template_params': ['RULE', |
|
|
|
'_template_params _COMMA RULE'], |
|
|
|
'expansions': ['alias', |
|
|
|
'expansions _OR alias', |
|
|
|
'expansions _NL _OR alias'], |
|
|
@@ -123,7 +129,8 @@ RULES = { |
|
|
|
'value': ['terminal', |
|
|
|
'nonterminal', |
|
|
|
'literal', |
|
|
|
'range'], |
|
|
|
'range', |
|
|
|
'template_usage'], |
|
|
|
|
|
|
|
'terminal': ['TERMINAL'], |
|
|
|
'nonterminal': ['RULE'], |
|
|
@@ -132,9 +139,13 @@ RULES = { |
|
|
|
|
|
|
|
'maybe': ['_LBRA expansions _RBRA'], |
|
|
|
'range': ['STRING _DOTDOT STRING'], |
|
|
|
|
|
|
|
'template_usage': ['RULE _LBRACE _template_args _RBRACE'], |
|
|
|
'_template_args': ['value', |
|
|
|
'_template_args _COMMA value'], |
|
|
|
|
|
|
|
'term': ['TERMINAL _COLON expansions _NL', |
|
|
|
'TERMINAL _DOT NUMBER _COLON expansions _NL'], |
|
|
|
'TERMINAL _DOT NUMBER _COLON expansions _NL'], |
|
|
|
'statement': ['ignore', 'import', 'declare'], |
|
|
|
'ignore': ['_IGNORE expansions _NL'], |
|
|
|
'declare': ['_DECLARE _declare_args _NL'], |
|
|
@@ -340,6 +351,39 @@ class PrepareAnonTerminals(Transformer_InPlace): |
|
|
|
|
|
|
|
return Terminal(term_name, filter_out=isinstance(p, PatternStr)) |
|
|
|
|
|
|
|
class _ReplaceSymbols(Transformer_InPlace): |
|
|
|
" Helper for ApplyTemplates " |
|
|
|
|
|
|
|
def __init__(self): |
|
|
|
self.names = {} |
|
|
|
|
|
|
|
def value(self, c): |
|
|
|
if len(c) == 1 and isinstance(c[0], Token) and c[0].type == 'RULE' and c[0].value in self.names: |
|
|
|
return self.names[c[0].value] |
|
|
|
return self.__default__('value', c, None) |
|
|
|
|
|
|
|
class ApplyTemplates(Transformer_InPlace): |
|
|
|
" Apply the templates, creating new rules that represent the used templates " |
|
|
|
|
|
|
|
def __init__(self, rule_defs): |
|
|
|
self.rule_defs = rule_defs |
|
|
|
self.replacer = _ReplaceSymbols() |
|
|
|
self.created_templates = set() |
|
|
|
|
|
|
|
def template_usage(self, c): |
|
|
|
name = c[0] |
|
|
|
args = c[1:] |
|
|
|
result_name = "%s{%s}" % (name, ",".join(a.name for a in args)) |
|
|
|
if result_name not in self.created_templates: |
|
|
|
self.created_templates.add(result_name) |
|
|
|
(_n, params, tree, options) ,= (t for t in self.rule_defs if t[0] == name) |
|
|
|
assert len(params) == len(args), args |
|
|
|
result_tree = deepcopy(tree) |
|
|
|
self.replacer.names = dict(zip(params, args)) |
|
|
|
self.replacer.transform(result_tree) |
|
|
|
self.rule_defs.append((result_name, [], result_tree, deepcopy(options))) |
|
|
|
return NonTerminal(result_name) |
|
|
|
|
|
|
|
|
|
|
|
def _rfind(s, choices): |
|
|
|
return max(s.rfind(c) for c in choices) |
|
|
@@ -465,19 +509,28 @@ class Grammar: |
|
|
|
|
|
|
|
transformer = PrepareLiterals() * TerminalTreeToPattern() |
|
|
|
terminals = [TerminalDef(name, transformer.transform( term_tree ), priority) |
|
|
|
for name, (term_tree, priority) in term_defs if term_tree] |
|
|
|
for name, (term_tree, priority) in term_defs if term_tree] |
|
|
|
|
|
|
|
# ================= |
|
|
|
# Compile Rules |
|
|
|
# ================= |
|
|
|
|
|
|
|
# 1. Pre-process terminals |
|
|
|
transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals |
|
|
|
transformer = PrepareLiterals() * PrepareSymbols() * PrepareAnonTerminals(terminals) # Adds to terminals |
|
|
|
|
|
|
|
# 2. Inline Templates |
|
|
|
|
|
|
|
# 2. Convert EBNF to BNF (and apply step 1) |
|
|
|
transformer *= ApplyTemplates(rule_defs) |
|
|
|
|
|
|
|
# 3. Convert EBNF to BNF (and apply step 1 & 2) |
|
|
|
ebnf_to_bnf = EBNF_to_BNF() |
|
|
|
rules = [] |
|
|
|
for name, rule_tree, options in rule_defs: |
|
|
|
i = 0 |
|
|
|
while i < len(rule_defs): # We have to do it like this because rule_defs might grow due to templates |
|
|
|
name, params, rule_tree, options = rule_defs[i] |
|
|
|
i += 1 |
|
|
|
if len(params) != 0: # Dont transform templates |
|
|
|
continue |
|
|
|
ebnf_to_bnf.rule_options = RuleOptions(keep_all_tokens=True) if options.keep_all_tokens else None |
|
|
|
ebnf_to_bnf.prefix = name |
|
|
|
tree = transformer.transform(rule_tree) |
|
|
@@ -487,7 +540,7 @@ class Grammar: |
|
|
|
|
|
|
|
assert len(rules) == len({name for name, _t, _o in rules}), "Whoops, name collision" |
|
|
|
|
|
|
|
# 3. Compile tree to Rule objects |
|
|
|
# 4. Compile tree to Rule objects |
|
|
|
rule_tree_to_text = RuleTreeToText() |
|
|
|
|
|
|
|
simplify_rule = SimplifyRule_Visitor() |
|
|
@@ -575,7 +628,7 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): |
|
|
|
""" |
|
|
|
|
|
|
|
imported_terms = dict(grammar.term_defs) |
|
|
|
imported_rules = {n:(n,deepcopy(t),o) for n,t,o in grammar.rule_defs} |
|
|
|
imported_rules = {n:(n,p,deepcopy(t),o) for n,p,t,o in grammar.rule_defs} |
|
|
|
|
|
|
|
term_defs = [] |
|
|
|
rule_defs = [] |
|
|
@@ -584,14 +637,19 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): |
|
|
|
if symbol.type != 'RULE': |
|
|
|
return [] |
|
|
|
try: |
|
|
|
_, tree, _ = imported_rules[symbol] |
|
|
|
_, params, tree,_ = imported_rules[symbol] |
|
|
|
except KeyError: |
|
|
|
raise GrammarError("Missing symbol '%s' in grammar %s" % (symbol, namespace)) |
|
|
|
return _find_used_symbols(tree) - set(params) |
|
|
|
|
|
|
|
return _find_used_symbols(tree) |
|
|
|
|
|
|
|
|
|
|
|
def get_namespace_name(name): |
|
|
|
def get_namespace_name(name, params): |
|
|
|
if params is not None: |
|
|
|
try: |
|
|
|
return params[name] |
|
|
|
except KeyError: |
|
|
|
pass |
|
|
|
try: |
|
|
|
return aliases[name].value |
|
|
|
except KeyError: |
|
|
@@ -602,15 +660,18 @@ def import_from_grammar_into_namespace(grammar, namespace, aliases): |
|
|
|
to_import = list(bfs(aliases, rule_dependencies)) |
|
|
|
for symbol in to_import: |
|
|
|
if symbol.type == 'TERMINAL': |
|
|
|
term_defs.append([get_namespace_name(symbol), imported_terms[symbol]]) |
|
|
|
term_defs.append([get_namespace_name(symbol, None), imported_terms[symbol]]) |
|
|
|
else: |
|
|
|
assert symbol.type == 'RULE' |
|
|
|
rule = imported_rules[symbol] |
|
|
|
for t in rule[1].iter_subtrees(): |
|
|
|
_, params, tree, options = imported_rules[symbol] |
|
|
|
params_map = {p: ('%s__%s' if p[0]!='_' else '_%s__%s' ) % (namespace, p) for p in params} |
|
|
|
for t in tree.iter_subtrees(): |
|
|
|
for i, c in enumerate(t.children): |
|
|
|
if isinstance(c, Token) and c.type in ('RULE', 'TERMINAL'): |
|
|
|
t.children[i] = Token(c.type, get_namespace_name(c)) |
|
|
|
rule_defs.append((get_namespace_name(symbol), rule[1], rule[2])) |
|
|
|
t.children[i] = Token(c.type, get_namespace_name(c, params_map)) |
|
|
|
params = [params_map[p] for p in params] # We can not rely on ordered dictionaries |
|
|
|
rule_defs.append((get_namespace_name(symbol, params_map), params, tree, options)) |
|
|
|
|
|
|
|
|
|
|
|
return term_defs, rule_defs |
|
|
|
|
|
|
@@ -648,20 +709,22 @@ def resolve_term_references(term_defs): |
|
|
|
raise GrammarError("Recursion in terminal '%s' (recursion is only allowed in rules, not terminals)" % name) |
|
|
|
|
|
|
|
|
|
|
|
def options_from_rule(name, *x): |
|
|
|
def options_from_rule(name, params, *x): |
|
|
|
if len(x) > 1: |
|
|
|
priority, expansions = x |
|
|
|
priority = int(priority) |
|
|
|
else: |
|
|
|
expansions ,= x |
|
|
|
priority = None |
|
|
|
params = [t.value for t in params.children] if params is not None else [] # For the grammar parser |
|
|
|
|
|
|
|
keep_all_tokens = name.startswith('!') |
|
|
|
name = name.lstrip('!') |
|
|
|
expand1 = name.startswith('?') |
|
|
|
name = name.lstrip('?') |
|
|
|
|
|
|
|
return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) |
|
|
|
return name, params, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority, |
|
|
|
template_source=(name if params else None)) |
|
|
|
|
|
|
|
|
|
|
|
def symbols_from_strcase(expansion): |
|
|
@@ -684,8 +747,8 @@ class GrammarLoader: |
|
|
|
def __init__(self): |
|
|
|
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] |
|
|
|
|
|
|
|
rules = [options_from_rule(name, x) for name, x in RULES.items()] |
|
|
|
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, xs, o in rules for i, x in enumerate(xs)] |
|
|
|
rules = [options_from_rule(name, None, x) for name, x in RULES.items()] |
|
|
|
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] |
|
|
|
callback = ParseTreeBuilder(rules, ST).create_callback() |
|
|
|
lexer_conf = LexerConf(terminals, ['WS', 'COMMENT']) |
|
|
|
|
|
|
@@ -832,23 +895,37 @@ class GrammarLoader: |
|
|
|
|
|
|
|
rules = rule_defs |
|
|
|
|
|
|
|
rule_names = set() |
|
|
|
for name, _x, _o in rules: |
|
|
|
rule_names = {} |
|
|
|
for name, params, _x, _o in rules: |
|
|
|
if name.startswith('__'): |
|
|
|
raise GrammarError('Names starting with double-underscore are reserved (Error at %s)' % name) |
|
|
|
if name in rule_names: |
|
|
|
raise GrammarError("Rule '%s' defined more than once" % name) |
|
|
|
rule_names.add(name) |
|
|
|
|
|
|
|
for name, expansions, _o in rules: |
|
|
|
rule_names[name] = len(params) |
|
|
|
|
|
|
|
for name, params , expansions, _o in rules: |
|
|
|
for i, p in enumerate(params): |
|
|
|
if p in rule_names: |
|
|
|
raise GrammarError("Template Parameter conflicts with rule %s (in template %s)" % (p, name)) |
|
|
|
if p in params[:i]: |
|
|
|
raise GrammarError("Duplicate Template Parameter %s (in template %s)" % (p, name)) |
|
|
|
for temp in expansions.find_data('template_usage'): |
|
|
|
sym = temp.children[0] |
|
|
|
args = temp.children[1:] |
|
|
|
if sym not in rule_names: |
|
|
|
raise GrammarError("Template '%s' used but not defined (in rule %s)" % (sym, name)) |
|
|
|
if len(args) != rule_names[sym]: |
|
|
|
raise GrammarError("Wrong number of template arguments used for %s " |
|
|
|
"(expected %s, got %s) (in rule %s)"%(sym, rule_names[sym], len(args), name)) |
|
|
|
for sym in _find_used_symbols(expansions): |
|
|
|
if sym.type == 'TERMINAL': |
|
|
|
if sym not in terminal_names: |
|
|
|
raise GrammarError("Token '%s' used but not defined (in rule %s)" % (sym, name)) |
|
|
|
else: |
|
|
|
if sym not in rule_names: |
|
|
|
if sym not in rule_names and sym not in params: |
|
|
|
raise GrammarError("Rule '%s' used but not defined (in rule %s)" % (sym, name)) |
|
|
|
|
|
|
|
|
|
|
|
return Grammar(rules, term_defs, ignore_names) |
|
|
|
|
|
|
|
|
|
|
|