@@ -40,6 +40,9 @@ class Token(Str): | |||
def __repr__(self): | |||
return 'Token(%s, %r)' % (self.type, self.value) | |||
def __deepcopy__(self, memo): | |||
return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) | |||
class Regex: | |||
def __init__(self, pattern, flags=()): | |||
self.pattern = pattern | |||
@@ -2,6 +2,7 @@ import os.path | |||
from itertools import chain | |||
import re | |||
from ast import literal_eval | |||
from copy import deepcopy | |||
from .lexer import Token, UnexpectedInput | |||
@@ -348,13 +349,15 @@ class Grammar: | |||
def compile(self, lexer=False, start=None): | |||
if not lexer: | |||
rule_defs = deepcopy(self.rule_defs) | |||
# XXX VERY HACKY!! There must be a better way.. | |||
ignore_tokens = [('_'+name, t) for name, t in self.token_defs if name in self.extra['ignore']] | |||
if ignore_tokens: | |||
self.token_defs = [('_'+name if name in self.extra['ignore'] else name,t) for name,t in self.token_defs] | |||
ignore_names = [t[0] for t in ignore_tokens] | |||
expr = Token('RULE', '__ignore') | |||
for r, tree, _o in self.rule_defs: | |||
for r, tree, _o in rule_defs: | |||
for exp in tree.find_data('expansion'): | |||
exp.children = list(interleave(exp.children, expr)) | |||
if r == start: # TODO use GrammarRule or similar (RuleOptions?) | |||
@@ -362,14 +365,34 @@ class Grammar: | |||
x = [T('expansion', [Token('RULE', x)]) for x in ignore_names] | |||
_ignore_tree = T('expr', [T('expansions', x), Token('OP', '?')]) | |||
self.rule_defs.append(('__ignore', _ignore_tree, None)) | |||
rule_defs.append(('__ignore', _ignore_tree, None)) | |||
# End of "ignore" section | |||
for name, tree in self.token_defs: | |||
self.rule_defs.append((name, tree, RuleOptions(keep_all_tokens=True))) | |||
rule_defs.append((name, tree, RuleOptions(keep_all_tokens=True))) | |||
token_defs = [] | |||
tokens_to_convert = {name: '__token_'+name for name, tree, _ in rule_defs if is_terminal(name)} | |||
new_rule_defs = [] | |||
for name, tree, options in rule_defs: | |||
if name in tokens_to_convert: | |||
if name.startswith('_'): | |||
options = RuleOptions.new_from(options, filter_out=True) | |||
else: | |||
options = RuleOptions.new_from(options, join_children=True) | |||
name = tokens_to_convert.get(name, name) | |||
for exp in chain( tree.find_data('expansion'), tree.find_data('expr') ): | |||
for i, sym in enumerate(exp.children): | |||
if sym in tokens_to_convert: | |||
exp.children[i] = Token(sym.type, tokens_to_convert[sym]) | |||
new_rule_defs.append((name, tree, options)) | |||
rule_defs = new_rule_defs | |||
else: | |||
token_defs = list(self.token_defs) | |||
rule_defs = self.rule_defs | |||
# ================= | |||
# Compile Tokens | |||
@@ -410,7 +433,7 @@ class Grammar: | |||
rule_tree_to_text = RuleTreeToText() | |||
rules = {} | |||
for name, rule_tree, options in self.rule_defs: | |||
for name, rule_tree, options in rule_defs: | |||
assert name not in rules, name | |||
rule_tree = PrepareLiterals().transform(rule_tree) | |||
if not lexer: | |||
@@ -431,9 +454,20 @@ class Grammar: | |||
class RuleOptions: | |||
def __init__(self, keep_all_tokens=False, expand1=False): | |||
def __init__(self, keep_all_tokens=False, expand1=False, join_children=False, filter_out=False): | |||
self.keep_all_tokens = keep_all_tokens | |||
self.expand1 = expand1 | |||
self.join_children = join_children # used for scanless postprocessing | |||
self.filter_out = filter_out # remove this rule from the tree | |||
# used for "token"-rules in scanless | |||
@classmethod | |||
def new_from(cls, options, **kw): | |||
return cls( | |||
keep_all_tokens=options and options.keep_all_tokens, | |||
expand1=options and options.expand1, | |||
**kw) | |||
def _extract_options_for_rule(name, expansions): | |||
keep_all_tokens = name.startswith('!') | |||
@@ -12,11 +12,19 @@ def create_expand1_tree_builder_function(tree_builder): | |||
return tree_builder(children) | |||
return expand1 | |||
def create_rule_handler(expansion, usermethod, keep_all_tokens): | |||
def create_join_children(tree_builder): | |||
def join_children(children): | |||
children = [''.join(children)] | |||
return tree_builder(children) | |||
return join_children | |||
def create_rule_handler(expansion, usermethod, keep_all_tokens, filter_out): | |||
# if not keep_all_tokens: | |||
to_include = [(i, not is_terminal(sym) and sym.startswith('_')) | |||
for i, sym in enumerate(expansion) | |||
if keep_all_tokens or not is_terminal(sym) or not sym.startswith('_')] | |||
if keep_all_tokens | |||
or not ((is_terminal(sym) and sym.startswith('_')) or sym in filter_out) | |||
] | |||
if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||
def _build_ast(match): | |||
@@ -49,9 +57,17 @@ class ParseTreeBuilder: | |||
def create_tree_builder(self, rules, transformer): | |||
callback = Callback() | |||
new_rules = [] | |||
filter_out = set() | |||
for origin, (expansions, options) in rules.items(): | |||
if options and options.filter_out: | |||
assert origin.startswith('_') # Just to make sure | |||
filter_out.add(origin) | |||
for origin, (expansions, options) in rules.items(): | |||
keep_all_tokens = options.keep_all_tokens if options else False | |||
expand1 = options.expand1 if options else False | |||
join_children = options.join_children if options else False | |||
_origin = origin | |||
@@ -59,8 +75,6 @@ class ParseTreeBuilder: | |||
if alias and origin.startswith('_'): | |||
raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias)) | |||
_alias = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||
try: | |||
f = transformer._get_func(alias or _origin) | |||
except AttributeError: | |||
@@ -71,12 +85,17 @@ class ParseTreeBuilder: | |||
if expand1: | |||
f = create_expand1_tree_builder_function(f) | |||
alias_handler = create_rule_handler(expansion, f, keep_all_tokens) | |||
if join_children: | |||
f = create_join_children(f) | |||
alias_handler = create_rule_handler(expansion, f, keep_all_tokens, filter_out) | |||
if hasattr(callback, _alias): | |||
callback_name = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||
if hasattr(callback, callback_name): | |||
raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | |||
setattr(callback, _alias, alias_handler) | |||
setattr(callback, callback_name, alias_handler) | |||
new_rules.append(( _origin, expansion, _alias )) | |||
new_rules.append(( _origin, expansion, callback_name )) | |||
return new_rules, callback |
@@ -149,16 +149,9 @@ class Nearley_NoLex: | |||
class Earley_NoLex: | |||
def __init__(self, lexer_conf, parser_conf): | |||
self.tokens_to_convert = {name: '__token_'+name for name, tree, _ in parser_conf.rules if is_terminal(name)} | |||
rules = [] | |||
for name, exp, alias in parser_conf.rules: | |||
name = self.tokens_to_convert.get(name, name) | |||
exp = [self.tokens_to_convert.get(x, x) for x in exp] | |||
rules.append((name, exp, alias)) | |||
self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in rules] | |||
rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||
self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | |||
@@ -177,16 +170,7 @@ class Earley_NoLex: | |||
new_text = tokenize_text(text) | |||
res = self.parser.parse(new_text) | |||
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||
res = res[0] | |||
class RestoreTokens(Transformer): | |||
pass | |||
for t in self.tokens_to_convert: | |||
setattr(RestoreTokens, t, ''.join) | |||
res = RestoreTokens().transform(res) | |||
return res | |||
return res[0] | |||
def get_frontend(parser, lexer): | |||
@@ -40,7 +40,7 @@ class Item(object): | |||
def __repr__(self): | |||
before = map(str, self.rule.expansion[:self.ptr]) | |||
after = map(str, self.rule.expansion[self.ptr:]) | |||
return '<(%d) %s : %s * %s>' % (self.start, self.rule.origin, ' '.join(before), ' '.join(after)) | |||
return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after)) | |||
class NewsList(list): | |||
@@ -76,9 +76,20 @@ class Column: | |||
for item in items: | |||
if item.is_complete: | |||
# if item in added: # XXX This causes a bug with empty rules | |||
# continue # And might be unnecessary | |||
# added.add(item) | |||
# (We must allow repetition of empty rules) | |||
if item.rule.expansion: | |||
# This is an important test to avoid infinite-loops, | |||
# For example for the rule: | |||
# a: a | "b" | |||
# If we can detect these cases statically, we can remove | |||
# this test an gain a small optimization | |||
# | |||
if item in added: | |||
continue | |||
added.add(item) | |||
self.to_reduce.append(item) | |||
else: | |||
if is_terminal(item.expect): | |||
@@ -23,12 +23,17 @@ class Reconstructor: | |||
tokens = {t.name:t for t in parser.lexer_conf.tokens} | |||
token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens} | |||
class MatchData: | |||
class MatchData(object): | |||
def __init__(self, data): | |||
self.data = data | |||
def __repr__(self): | |||
return '%s(%r)' % (type(self).__name__, self.data) | |||
class MatchTerminal(MatchData): | |||
def __call__(self, other): | |||
if isinstance(other, Tree): | |||
return False | |||
return token_res[self.data].match(other) is not None | |||
class MatchTree(MatchData): | |||
@@ -66,8 +71,11 @@ class Reconstructor: | |||
return to_write | |||
# Recreate the rules to assume a standard lexer | |||
_tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever') | |||
d = defaultdict(list) | |||
for name, (expansions, _o) in parser.rules.items(): | |||
for name, (expansions, _o) in rules.items(): | |||
for expansion, alias in expansions: | |||
if alias: | |||
d[alias].append(expansion) | |||