diff --git a/lark/grammar.py b/lark/grammar.py index f853182..d257bc4 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -26,21 +26,6 @@ class RuleOptions: self.filter_out = filter_out # remove this rule from the tree # used for "token"-rules in scanless - @classmethod - def from_rule(cls, name, *x): - if len(x) > 1: - priority, expansions = x - priority = int(priority) - else: - expansions ,= x - priority = None - - keep_all_tokens = name.startswith('!') - name = name.lstrip('!') - expand1 = name.startswith('?') - name = name.lstrip('?') - - return name, expansions, cls(keep_all_tokens, expand1, priority=priority) def __repr__(self): return 'RuleOptions(%r, %r, %r, %r, %r)' % ( diff --git a/lark/lark.py b/lark/lark.py index 03bd253..a7af772 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -165,14 +165,15 @@ class Lark: def _build_parser(self): self.parser_class = get_frontend(self.options.parser, self.options.lexer) + self.parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens) - rules, callback = self.parse_tree_builder.apply(self.options.transformer) + callback = self.parse_tree_builder.apply(self.options.transformer) if self.profiler: for f in dir(callback): if not (f.startswith('__') and f.endswith('__')): setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f))) - parser_conf = ParserConf(rules, callback, self.options.start) + parser_conf = ParserConf(self.rules, callback, self.options.start) return self.parser_class(self.lexer_conf, parser_conf, options=self.options) diff --git a/lark/load_grammar.py b/lark/load_grammar.py index ce4ec5a..b38a67c 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import LALR from .parsers.lalr_parser import UnexpectedToken from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef -from .grammar import RuleOptions +from .grammar import RuleOptions, Rule from .tree import Tree as T, Transformer, InlineTransformer, Visitor @@ -485,13 +485,21 @@ class Grammar: dict_update_safe(rules, ebnf_to_bnf.new_rules) - for tree, _o in rules.values(): + rule_tree_to_text = RuleTreeToText() + + new_rules = [] + for origin, (tree, options) in rules.items(): simplify_rule.visit(tree) + expansions = rule_tree_to_text.transform(tree) - rule_tree_to_text = RuleTreeToText() - rules = {origin: (rule_tree_to_text.transform(tree), options) for origin, (tree, options) in rules.items()} + for expansion, alias in expansions: + if alias and origin.startswith('_'): + raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias)) - return tokens, rules, self.ignore + rule = Rule(origin, expansion, alias, options) + new_rules.append(rule) + + return tokens, new_rules, self.ignore @@ -528,14 +536,28 @@ def resolve_token_references(token_defs): if not changed: break +def options_from_rule(name, *x): + if len(x) > 1: + priority, expansions = x + priority = int(priority) + else: + expansions ,= x + priority = None + + keep_all_tokens = name.startswith('!') + name = name.lstrip('!') + expand1 = name.startswith('?') + name = name.lstrip('?') + + return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) class GrammarLoader: def __init__(self): tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()] - rules = [RuleOptions.from_rule(name, x) for name, x in RULES.items()] - d = {r: ([(x.split(), None) for x in xs], o) for r, xs, o in rules} - rules, callback = ParseTreeBuilder(d, T).apply() + rules = [options_from_rule(name, x) for name, x in RULES.items()] + rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs] + callback = ParseTreeBuilder(rules, T).apply() lexer_conf = LexerConf(tokens, ['WS', 'COMMENT']) parser_conf = ParserConf(rules, callback, 'start') @@ -625,7 +647,7 @@ class GrammarLoader: # Resolve token references resolve_token_references(token_defs) - rules = [RuleOptions.from_rule(*x) for x in rule_defs] + rules = [options_from_rule(*x) for x in rule_defs] rule_names = set() for name, _x, _o in rules: diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index e26d287..4513583 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -109,49 +109,46 @@ class ParseTreeBuilder: def _init_builders(self, rules): filter_out = set() - for origin, (expansions, options) in rules.items(): - if options and options.filter_out: - assert origin.startswith('_') # Just to make sure - filter_out.add(origin) + for rule in rules: + if rule.options and rule.options.filter_out: + assert rule.origin.startswith('_') # Just to make sure + filter_out.add(rule.origin) - for origin, (expansions, options) in rules.items(): + for rule in rules: + options = rule.options keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) expand1 = options.expand1 if options else False create_token = options.create_token if options else False - for expansion, alias in expansions: - if alias and origin.startswith('_'): - raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias)) + wrapper_chain = filter(None, [ + (expand1 and not rule.alias) and Expand1, + create_token and Factory(TokenWrapper, create_token), + create_rule_handler(rule.expansion, keep_all_tokens, filter_out), + self.propagate_positions and PropagatePositions, + ]) - wrapper_chain = filter(None, [ - (expand1 and not alias) and Expand1, - create_token and Factory(TokenWrapper, create_token), - create_rule_handler(expansion, keep_all_tokens, filter_out), - self.propagate_positions and PropagatePositions, - ]) - - yield origin, expansion, options, alias or origin, wrapper_chain + yield rule, wrapper_chain def apply(self, transformer=None): callback = Callback() - new_rules = [] - for origin, expansion, options, alias, wrapper_chain in self.rule_builders: - callback_name = '_callback_%s_%s' % (origin, '_'.join(expansion)) + for rule, wrapper_chain in self.rule_builders: + internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion)) + user_callback_name = rule.alias or rule.origin try: - f = transformer._get_func(alias) + f = transformer._get_func(user_callback_name) except AttributeError: - f = NodeBuilder(self.tree_class, alias) + f = NodeBuilder(self.tree_class, user_callback_name) + + rule.alias = internal_callback_name for w in wrapper_chain: f = w(f) - if hasattr(callback, callback_name): - raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) - setattr(callback, callback_name, f) - - new_rules.append( Rule( origin, expansion, callback_name, options )) + if hasattr(callback, internal_callback_name): + raise GrammarError("Rule '%s' already exists" % (rule,)) + setattr(callback, internal_callback_name, f) - return new_rules, callback + return callback