|
|
@@ -324,7 +324,7 @@ class TokenTreeToPattern(Transformer): |
|
|
|
def expansion(self, items): |
|
|
|
if len(items) == 1: |
|
|
|
return items[0] |
|
|
|
if len(set(i.flags for i in items)) > 1: |
|
|
|
if len({i.flags for i in items}) > 1: |
|
|
|
raise GrammarError("Lark doesn't support joining tokens with conflicting flags!") |
|
|
|
return PatternRE(''.join(i.to_regexp() for i in items), items[0].flags) |
|
|
|
|
|
|
@@ -348,60 +348,64 @@ def _interleave(l, item): |
|
|
|
elif is_terminal(e): |
|
|
|
yield item |
|
|
|
|
|
|
|
def _choice_of_rules(rules): |
|
|
|
return T('expansions', [T('expansion', [Token('RULE', name)]) for name in rules]) |
|
|
|
|
|
|
|
class Grammar: |
|
|
|
def __init__(self, rule_defs, token_defs, extra): |
|
|
|
self.token_defs = token_defs |
|
|
|
self.rule_defs = rule_defs |
|
|
|
self.extra = extra |
|
|
|
|
|
|
|
def compile(self, lexer=False, start=None): |
|
|
|
if not lexer: |
|
|
|
rule_defs = deepcopy(self.rule_defs) |
|
|
|
|
|
|
|
# XXX VERY HACKY!! There must be a better way.. |
|
|
|
ignore_tokens = [('_'+name, t) for name, t in self.token_defs if name in self.extra['ignore']] |
|
|
|
if ignore_tokens: |
|
|
|
self.token_defs = [('_'+name if name in self.extra['ignore'] else name,t) for name,t in self.token_defs] |
|
|
|
ignore_names = [t[0] for t in ignore_tokens] |
|
|
|
expr = Token('RULE', '__ignore') |
|
|
|
for r, tree, _o in rule_defs: |
|
|
|
for exp in tree.find_data('expansion'): |
|
|
|
exp.children = list(_interleave(exp.children, expr)) |
|
|
|
if r == start: |
|
|
|
exp.children = [expr] + exp.children |
|
|
|
for exp in tree.find_data('expr'): |
|
|
|
exp.children[0] = T('expansion', list(_interleave(exp.children[:1], expr))) |
|
|
|
|
|
|
|
x = [T('expansion', [Token('RULE', x)]) for x in ignore_names] |
|
|
|
_ignore_tree = T('expr', [T('expansions', x), Token('OP', '?')]) |
|
|
|
rule_defs.append(('__ignore', _ignore_tree, None)) |
|
|
|
# End of "ignore" section |
|
|
|
|
|
|
|
rule_defs += [(name, tree, RuleOptions(keep_all_tokens=True)) for name, tree in self.token_defs] |
|
|
|
token_defs = [] |
|
|
|
|
|
|
|
tokens_to_convert = {name: '__token_'+name for name, tree, _ in rule_defs if is_terminal(name)} |
|
|
|
new_rule_defs = [] |
|
|
|
for name, tree, options in rule_defs: |
|
|
|
if name in tokens_to_convert: |
|
|
|
if name.startswith('_'): |
|
|
|
options = RuleOptions.new_from(options, filter_out=True) |
|
|
|
else: |
|
|
|
options = RuleOptions.new_from(options, create_token=name) |
|
|
|
name = tokens_to_convert[name] |
|
|
|
inner = Token('RULE', name + '_inner') |
|
|
|
new_rule_defs.append((name, T('expansions', [T('expansion', [inner])]), None)) |
|
|
|
name = inner |
|
|
|
def _prepare_scanless_grammar(self, start): |
|
|
|
# XXX Pretty hacky! There should be a better way to write this method.. |
|
|
|
|
|
|
|
rule_defs = deepcopy(self.rule_defs) |
|
|
|
term_defs = self.token_defs |
|
|
|
|
|
|
|
# Implement the "%ignore" feature without a lexer.. |
|
|
|
terms_to_ignore = {name:'__'+name for name in self.extra['ignore']} |
|
|
|
if terms_to_ignore: |
|
|
|
assert set(terms_to_ignore) <= {name for name, t in term_defs} |
|
|
|
term_defs = [(terms_to_ignore.get(name,name),t) for name,t in term_defs] |
|
|
|
expr = Token('RULE', '__ignore') |
|
|
|
for r, tree, _o in rule_defs: |
|
|
|
for exp in tree.find_data('expansion'): |
|
|
|
exp.children = list(_interleave(exp.children, expr)) |
|
|
|
if r == start: |
|
|
|
exp.children = [expr] + exp.children |
|
|
|
for exp in tree.find_data('expr'): |
|
|
|
exp.children[0] = T('expansion', list(_interleave(exp.children[:1], expr))) |
|
|
|
|
|
|
|
_ignore_tree = T('expr', [_choice_of_rules(terms_to_ignore.values()), Token('OP', '?')]) |
|
|
|
rule_defs.append(('__ignore', _ignore_tree, None)) |
|
|
|
|
|
|
|
# Convert all tokens to rules |
|
|
|
new_terminal_names = {name: '__token_'+name for name, tree in term_defs} |
|
|
|
|
|
|
|
for name, tree, options in rule_defs: |
|
|
|
for exp in chain( tree.find_data('expansion'), tree.find_data('expr') ): |
|
|
|
for i, sym in enumerate(exp.children): |
|
|
|
if sym in new_terminal_names: |
|
|
|
exp.children[i] = Token(sym.type, new_terminal_names[sym]) |
|
|
|
|
|
|
|
for name, tree in term_defs: |
|
|
|
if name.startswith('_'): |
|
|
|
options = RuleOptions(filter_out=True) |
|
|
|
else: |
|
|
|
options = RuleOptions(keep_all_tokens=True, create_token=name) |
|
|
|
|
|
|
|
else: |
|
|
|
for exp in chain( tree.find_data('expansion'), tree.find_data('expr') ): |
|
|
|
for i, sym in enumerate(exp.children): |
|
|
|
if sym in tokens_to_convert: |
|
|
|
exp.children[i] = Token(sym.type, tokens_to_convert[sym]) |
|
|
|
name = new_terminal_names[name] |
|
|
|
inner_name = name + '_inner' |
|
|
|
rule_defs.append((name, _choice_of_rules([inner_name]), None)) |
|
|
|
rule_defs.append((inner_name, tree, options)) |
|
|
|
|
|
|
|
new_rule_defs.append((name, tree, options)) |
|
|
|
return [], rule_defs |
|
|
|
|
|
|
|
rule_defs = new_rule_defs |
|
|
|
|
|
|
|
def compile(self, lexer=False, start=None): |
|
|
|
if not lexer: |
|
|
|
token_defs, rule_defs = self._prepare_scanless_grammar(start) |
|
|
|
else: |
|
|
|
token_defs = list(self.token_defs) |
|
|
|
rule_defs = self.rule_defs |
|
|
@@ -473,14 +477,6 @@ class RuleOptions: |
|
|
|
|
|
|
|
self.filter_out = filter_out # remove this rule from the tree |
|
|
|
# used for "token"-rules in scanless |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def new_from(cls, options, **kw): |
|
|
|
return cls( |
|
|
|
keep_all_tokens=options and options.keep_all_tokens, |
|
|
|
expand1=options and options.expand1, |
|
|
|
**kw) |
|
|
|
|
|
|
|
@classmethod |
|
|
|
def from_rule(cls, name, expansions): |
|
|
|
keep_all_tokens = name.startswith('!') |
|
|
|