| @@ -40,6 +40,9 @@ class Token(Str): | |||
| def __repr__(self): | |||
| return 'Token(%s, %r)' % (self.type, self.value) | |||
| def __deepcopy__(self, memo): | |||
| return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) | |||
| class Regex: | |||
| def __init__(self, pattern, flags=()): | |||
| self.pattern = pattern | |||
| @@ -2,6 +2,7 @@ import os.path | |||
| from itertools import chain | |||
| import re | |||
| from ast import literal_eval | |||
| from copy import deepcopy | |||
| from .lexer import Token, UnexpectedInput | |||
| @@ -348,13 +349,15 @@ class Grammar: | |||
| def compile(self, lexer=False, start=None): | |||
| if not lexer: | |||
| rule_defs = deepcopy(self.rule_defs) | |||
| # XXX VERY HACKY!! There must be a better way.. | |||
| ignore_tokens = [('_'+name, t) for name, t in self.token_defs if name in self.extra['ignore']] | |||
| if ignore_tokens: | |||
| self.token_defs = [('_'+name if name in self.extra['ignore'] else name,t) for name,t in self.token_defs] | |||
| ignore_names = [t[0] for t in ignore_tokens] | |||
| expr = Token('RULE', '__ignore') | |||
| for r, tree, _o in self.rule_defs: | |||
| for r, tree, _o in rule_defs: | |||
| for exp in tree.find_data('expansion'): | |||
| exp.children = list(interleave(exp.children, expr)) | |||
| if r == start: # TODO use GrammarRule or similar (RuleOptions?) | |||
| @@ -362,14 +365,34 @@ class Grammar: | |||
| x = [T('expansion', [Token('RULE', x)]) for x in ignore_names] | |||
| _ignore_tree = T('expr', [T('expansions', x), Token('OP', '?')]) | |||
| self.rule_defs.append(('__ignore', _ignore_tree, None)) | |||
| rule_defs.append(('__ignore', _ignore_tree, None)) | |||
| # End of "ignore" section | |||
| for name, tree in self.token_defs: | |||
| self.rule_defs.append((name, tree, RuleOptions(keep_all_tokens=True))) | |||
| rule_defs.append((name, tree, RuleOptions(keep_all_tokens=True))) | |||
| token_defs = [] | |||
| tokens_to_convert = {name: '__token_'+name for name, tree, _ in rule_defs if is_terminal(name)} | |||
| new_rule_defs = [] | |||
| for name, tree, options in rule_defs: | |||
| if name in tokens_to_convert: | |||
| if name.startswith('_'): | |||
| options = RuleOptions.new_from(options, filter_out=True) | |||
| else: | |||
| options = RuleOptions.new_from(options, join_children=True) | |||
| name = tokens_to_convert.get(name, name) | |||
| for exp in chain( tree.find_data('expansion'), tree.find_data('expr') ): | |||
| for i, sym in enumerate(exp.children): | |||
| if sym in tokens_to_convert: | |||
| exp.children[i] = Token(sym.type, tokens_to_convert[sym]) | |||
| new_rule_defs.append((name, tree, options)) | |||
| rule_defs = new_rule_defs | |||
| else: | |||
| token_defs = list(self.token_defs) | |||
| rule_defs = self.rule_defs | |||
| # ================= | |||
| # Compile Tokens | |||
| @@ -410,7 +433,7 @@ class Grammar: | |||
| rule_tree_to_text = RuleTreeToText() | |||
| rules = {} | |||
| for name, rule_tree, options in self.rule_defs: | |||
| for name, rule_tree, options in rule_defs: | |||
| assert name not in rules, name | |||
| rule_tree = PrepareLiterals().transform(rule_tree) | |||
| if not lexer: | |||
| @@ -431,9 +454,20 @@ class Grammar: | |||
| class RuleOptions: | |||
| def __init__(self, keep_all_tokens=False, expand1=False): | |||
| def __init__(self, keep_all_tokens=False, expand1=False, join_children=False, filter_out=False): | |||
| self.keep_all_tokens = keep_all_tokens | |||
| self.expand1 = expand1 | |||
| self.join_children = join_children # used for scanless postprocessing | |||
| self.filter_out = filter_out # remove this rule from the tree | |||
| # used for "token"-rules in scanless | |||
| @classmethod | |||
| def new_from(cls, options, **kw): | |||
| return cls( | |||
| keep_all_tokens=options and options.keep_all_tokens, | |||
| expand1=options and options.expand1, | |||
| **kw) | |||
| def _extract_options_for_rule(name, expansions): | |||
| keep_all_tokens = name.startswith('!') | |||
| @@ -12,11 +12,19 @@ def create_expand1_tree_builder_function(tree_builder): | |||
| return tree_builder(children) | |||
| return expand1 | |||
| def create_rule_handler(expansion, usermethod, keep_all_tokens): | |||
| def create_join_children(tree_builder): | |||
| def join_children(children): | |||
| children = [''.join(children)] | |||
| return tree_builder(children) | |||
| return join_children | |||
| def create_rule_handler(expansion, usermethod, keep_all_tokens, filter_out): | |||
| # if not keep_all_tokens: | |||
| to_include = [(i, not is_terminal(sym) and sym.startswith('_')) | |||
| for i, sym in enumerate(expansion) | |||
| if keep_all_tokens or not is_terminal(sym) or not sym.startswith('_')] | |||
| if keep_all_tokens | |||
| or not ((is_terminal(sym) and sym.startswith('_')) or sym in filter_out) | |||
| ] | |||
| if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include): | |||
| def _build_ast(match): | |||
| @@ -49,9 +57,17 @@ class ParseTreeBuilder: | |||
| def create_tree_builder(self, rules, transformer): | |||
| callback = Callback() | |||
| new_rules = [] | |||
| filter_out = set() | |||
| for origin, (expansions, options) in rules.items(): | |||
| if options and options.filter_out: | |||
| assert origin.startswith('_') # Just to make sure | |||
| filter_out.add(origin) | |||
| for origin, (expansions, options) in rules.items(): | |||
| keep_all_tokens = options.keep_all_tokens if options else False | |||
| expand1 = options.expand1 if options else False | |||
| join_children = options.join_children if options else False | |||
| _origin = origin | |||
| @@ -59,8 +75,6 @@ class ParseTreeBuilder: | |||
| if alias and origin.startswith('_'): | |||
| raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias)) | |||
| _alias = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||
| try: | |||
| f = transformer._get_func(alias or _origin) | |||
| except AttributeError: | |||
| @@ -71,12 +85,17 @@ class ParseTreeBuilder: | |||
| if expand1: | |||
| f = create_expand1_tree_builder_function(f) | |||
| alias_handler = create_rule_handler(expansion, f, keep_all_tokens) | |||
| if join_children: | |||
| f = create_join_children(f) | |||
| alias_handler = create_rule_handler(expansion, f, keep_all_tokens, filter_out) | |||
| if hasattr(callback, _alias): | |||
| callback_name = 'autoalias_%s_%s' % (_origin, '_'.join(expansion)) | |||
| if hasattr(callback, callback_name): | |||
| raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | |||
| setattr(callback, _alias, alias_handler) | |||
| setattr(callback, callback_name, alias_handler) | |||
| new_rules.append(( _origin, expansion, _alias )) | |||
| new_rules.append(( _origin, expansion, callback_name )) | |||
| return new_rules, callback | |||
| @@ -149,16 +149,9 @@ class Nearley_NoLex: | |||
| class Earley_NoLex: | |||
| def __init__(self, lexer_conf, parser_conf): | |||
| self.tokens_to_convert = {name: '__token_'+name for name, tree, _ in parser_conf.rules if is_terminal(name)} | |||
| rules = [] | |||
| for name, exp, alias in parser_conf.rules: | |||
| name = self.tokens_to_convert.get(name, name) | |||
| exp = [self.tokens_to_convert.get(x, x) for x in exp] | |||
| rules.append((name, exp, alias)) | |||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in rules] | |||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||
| self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | |||
| @@ -177,16 +170,7 @@ class Earley_NoLex: | |||
| new_text = tokenize_text(text) | |||
| res = self.parser.parse(new_text) | |||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||
| res = res[0] | |||
| class RestoreTokens(Transformer): | |||
| pass | |||
| for t in self.tokens_to_convert: | |||
| setattr(RestoreTokens, t, ''.join) | |||
| res = RestoreTokens().transform(res) | |||
| return res | |||
| return res[0] | |||
| def get_frontend(parser, lexer): | |||
| @@ -40,7 +40,7 @@ class Item(object): | |||
| def __repr__(self): | |||
| before = map(str, self.rule.expansion[:self.ptr]) | |||
| after = map(str, self.rule.expansion[self.ptr:]) | |||
| return '<(%d) %s : %s * %s>' % (self.start, self.rule.origin, ' '.join(before), ' '.join(after)) | |||
| return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after)) | |||
| class NewsList(list): | |||
| @@ -76,9 +76,20 @@ class Column: | |||
| for item in items: | |||
| if item.is_complete: | |||
| # if item in added: # XXX This causes a bug with empty rules | |||
| # continue # And might be unnecessary | |||
| # added.add(item) | |||
| # (We must allow repetition of empty rules) | |||
| if item.rule.expansion: | |||
| # This is an important test to avoid infinite-loops, | |||
| # For example for the rule: | |||
| # a: a | "b" | |||
| # If we can detect these cases statically, we can remove | |||
| # this test an gain a small optimization | |||
| # | |||
| if item in added: | |||
| continue | |||
| added.add(item) | |||
| self.to_reduce.append(item) | |||
| else: | |||
| if is_terminal(item.expect): | |||
| @@ -23,12 +23,17 @@ class Reconstructor: | |||
| tokens = {t.name:t for t in parser.lexer_conf.tokens} | |||
| token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens} | |||
| class MatchData: | |||
| class MatchData(object): | |||
| def __init__(self, data): | |||
| self.data = data | |||
| def __repr__(self): | |||
| return '%s(%r)' % (type(self).__name__, self.data) | |||
| class MatchTerminal(MatchData): | |||
| def __call__(self, other): | |||
| if isinstance(other, Tree): | |||
| return False | |||
| return token_res[self.data].match(other) is not None | |||
| class MatchTree(MatchData): | |||
| @@ -66,8 +71,11 @@ class Reconstructor: | |||
| return to_write | |||
| # Recreate the rules to assume a standard lexer | |||
| _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever') | |||
| d = defaultdict(list) | |||
| for name, (expansions, _o) in parser.rules.items(): | |||
| for name, (expansions, _o) in rules.items(): | |||
| for expansion, alias in expansions: | |||
| if alias: | |||
| d[alias].append(expansion) | |||