Browse Source

More refactoring, untangling grammar compilation and parse-tree creation

Erez Shinan 7 years ago
4 changed files with 58 additions and 53 deletions
  1. +0
  2. +3
  3. +31
  4. +24

+ 0
- 15
lark/ View File

@@ -26,21 +26,6 @@ class RuleOptions:

self.filter_out = filter_out # remove this rule from the tree
# used for "token"-rules in scanless
def from_rule(cls, name, *x):
if len(x) > 1:
priority, expansions = x
priority = int(priority)
expansions ,= x
priority = None

keep_all_tokens = name.startswith('!')
name = name.lstrip('!')
expand1 = name.startswith('?')
name = name.lstrip('?')

return name, expansions, cls(keep_all_tokens, expand1, priority=priority)

def __repr__(self):
return 'RuleOptions(%r, %r, %r, %r, %r)' % (

+ 3
- 2
lark/ View File

@@ -165,14 +165,15 @@ class Lark:

def _build_parser(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)

self.parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens)
rules, callback = self.parse_tree_builder.apply(self.options.transformer)
callback = self.parse_tree_builder.apply(self.options.transformer)
if self.profiler:
for f in dir(callback):
if not (f.startswith('__') and f.endswith('__')):
setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f)))

parser_conf = ParserConf(rules, callback, self.options.start)
parser_conf = ParserConf(self.rules, callback, self.options.start)

return self.parser_class(self.lexer_conf, parser_conf, options=self.options)

+ 31
- 9
lark/ View File

@@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR
from .parsers.lalr_parser import UnexpectedToken
from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .grammar import RuleOptions
from .grammar import RuleOptions, Rule

from .tree import Tree as T, Transformer, InlineTransformer, Visitor

@@ -485,13 +485,21 @@ class Grammar:

dict_update_safe(rules, ebnf_to_bnf.new_rules)

for tree, _o in rules.values():
rule_tree_to_text = RuleTreeToText()

new_rules = []
for origin, (tree, options) in rules.items():
expansions = rule_tree_to_text.transform(tree)

rule_tree_to_text = RuleTreeToText()
rules = {origin: (rule_tree_to_text.transform(tree), options) for origin, (tree, options) in rules.items()}
for expansion, alias in expansions:
if alias and origin.startswith('_'):
raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias))

return tokens, rules, self.ignore
rule = Rule(origin, expansion, alias, options)

return tokens, new_rules, self.ignore

@@ -528,14 +536,28 @@ def resolve_token_references(token_defs):
if not changed:

def options_from_rule(name, *x):
if len(x) > 1:
priority, expansions = x
priority = int(priority)
expansions ,= x
priority = None

keep_all_tokens = name.startswith('!')
name = name.lstrip('!')
expand1 = name.startswith('?')
name = name.lstrip('?')

return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority)

class GrammarLoader:
def __init__(self):
tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()]

rules = [RuleOptions.from_rule(name, x) for name, x in RULES.items()]
d = {r: ([(x.split(), None) for x in xs], o) for r, xs, o in rules}
rules, callback = ParseTreeBuilder(d, T).apply()
rules = [options_from_rule(name, x) for name, x in RULES.items()]
rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs]
callback = ParseTreeBuilder(rules, T).apply()
lexer_conf = LexerConf(tokens, ['WS', 'COMMENT'])

parser_conf = ParserConf(rules, callback, 'start')
@@ -625,7 +647,7 @@ class GrammarLoader:
# Resolve token references

rules = [RuleOptions.from_rule(*x) for x in rule_defs]
rules = [options_from_rule(*x) for x in rule_defs]

rule_names = set()
for name, _x, _o in rules:

+ 24
- 27
lark/ View File

@@ -109,49 +109,46 @@ class ParseTreeBuilder:

def _init_builders(self, rules):
filter_out = set()
for origin, (expansions, options) in rules.items():
if options and options.filter_out:
assert origin.startswith('_') # Just to make sure
for rule in rules:
if rule.options and rule.options.filter_out:
assert rule.origin.startswith('_') # Just to make sure

for origin, (expansions, options) in rules.items():
for rule in rules:
options = rule.options
keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False)
expand1 = options.expand1 if options else False
create_token = options.create_token if options else False

for expansion, alias in expansions:
if alias and origin.startswith('_'):
raise Exception("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (origin, alias))
wrapper_chain = filter(None, [
(expand1 and not rule.alias) and Expand1,
create_token and Factory(TokenWrapper, create_token),
create_rule_handler(rule.expansion, keep_all_tokens, filter_out),
self.propagate_positions and PropagatePositions,

wrapper_chain = filter(None, [
(expand1 and not alias) and Expand1,
create_token and Factory(TokenWrapper, create_token),
create_rule_handler(expansion, keep_all_tokens, filter_out),
self.propagate_positions and PropagatePositions,

yield origin, expansion, options, alias or origin, wrapper_chain
yield rule, wrapper_chain

def apply(self, transformer=None):
callback = Callback()

new_rules = []
for origin, expansion, options, alias, wrapper_chain in self.rule_builders:
callback_name = '_callback_%s_%s' % (origin, '_'.join(expansion))
for rule, wrapper_chain in self.rule_builders:
internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion))

user_callback_name = rule.alias or rule.origin
f = transformer._get_func(alias)
f = transformer._get_func(user_callback_name)
except AttributeError:
f = NodeBuilder(self.tree_class, alias)
f = NodeBuilder(self.tree_class, user_callback_name)

rule.alias = internal_callback_name

for w in wrapper_chain:
f = w(f)

if hasattr(callback, callback_name):
raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin))
setattr(callback, callback_name, f)

new_rules.append( Rule( origin, expansion, callback_name, options ))
if hasattr(callback, internal_callback_name):
raise GrammarError("Rule '%s' already exists" % (rule,))
setattr(callback, internal_callback_name, f)

return new_rules, callback
return callback
