Browse Source

Improvements based on the PR of @drslump (https://github.com/erezsh/lark/pull/125)

Mostly improvements to memory consumption.
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 7 years ago
parent
commit
04c0b04add
7 changed files with 31 additions and 27 deletions
  1. +19
    -19
      lark/load_grammar.py
  2. +2
    -2
      lark/parser_frontends.py
  3. +3
    -3
      lark/parsers/earley.py
  4. +2
    -1
      lark/parsers/grammar_analysis.py
  5. +2
    -1
      lark/parsers/lalr_parser.py
  6. +1
    -1
      lark/tools/standalone.py
  7. +2
    -0
      lark/tree.py

+ 19
- 19
lark/load_grammar.py View File

@@ -14,7 +14,7 @@ from .parsers.lalr_parser import UnexpectedToken
from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .grammar import RuleOptions, Rule from .grammar import RuleOptions, Rule


from .tree import Tree as T, Transformer, InlineTransformer, Visitor
from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST


__path__ = os.path.dirname(__file__) __path__ = os.path.dirname(__file__)
IMPORT_PATHS = [os.path.join(__path__, 'grammars')] IMPORT_PATHS = [os.path.join(__path__, 'grammars')]
@@ -145,14 +145,14 @@ class EBNF_to_BNF(InlineTransformer):
new_name = '__%s_%s_%d' % (self.prefix, type_, self.i) new_name = '__%s_%s_%d' % (self.prefix, type_, self.i)
self.i += 1 self.i += 1
t = Token('RULE', new_name, -1) t = Token('RULE', new_name, -1)
tree = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])])
tree = ST('expansions', [ST('expansion', [expr]), ST('expansion', [t, expr])])
self.new_rules.append((new_name, tree, self.rule_options)) self.new_rules.append((new_name, tree, self.rule_options))
self.rules_by_expr[expr] = t self.rules_by_expr[expr] = t
return t return t


def expr(self, rule, op, *args): def expr(self, rule, op, *args):
if op.value == '?': if op.value == '?':
return T('expansions', [rule, T('expansion', [])])
return ST('expansions', [rule, ST('expansion', [])])
elif op.value == '+': elif op.value == '+':
# a : b c+ d # a : b c+ d
# --> # -->
@@ -165,7 +165,7 @@ class EBNF_to_BNF(InlineTransformer):
# a : b _c? d # a : b _c? d
# _c : _c c | c; # _c : _c c | c;
new_name = self._add_recurse_rule('star', rule) new_name = self._add_recurse_rule('star', rule)
return T('expansions', [new_name, T('expansion', [])])
return ST('expansions', [new_name, ST('expansion', [])])
elif op.value == '~': elif op.value == '~':
if len(args) == 1: if len(args) == 1:
mn = mx = int(args[0]) mn = mx = int(args[0])
@@ -173,7 +173,7 @@ class EBNF_to_BNF(InlineTransformer):
mn, mx = map(int, args) mn, mx = map(int, args)
if mx < mn: if mx < mn:
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx)) raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
return T('expansions', [T('expansion', [rule] * n) for n in range(mn, mx+1)])
return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)])
assert False, op assert False, op




@@ -183,7 +183,7 @@ class SimplifyRule_Visitor(Visitor):
def _flatten(tree): def _flatten(tree):
while True: while True:
to_expand = [i for i, child in enumerate(tree.children) to_expand = [i for i, child in enumerate(tree.children)
if isinstance(child, T) and child.data == tree.data]
if isinstance(child, Tree) and child.data == tree.data]
if not to_expand: if not to_expand:
break break
tree.expand_kids_by_index(*to_expand) tree.expand_kids_by_index(*to_expand)
@@ -203,9 +203,9 @@ class SimplifyRule_Visitor(Visitor):
self._flatten(tree) self._flatten(tree)


for i, child in enumerate(tree.children): for i, child in enumerate(tree.children):
if isinstance(child, T) and child.data == 'expansions':
if isinstance(child, Tree) and child.data == 'expansions':
tree.data = 'expansions' tree.data = 'expansions'
tree.children = [self.visit(T('expansion', [option if i==j else other
tree.children = [self.visit(ST('expansion', [option if i==j else other
for j, other in enumerate(tree.children)])) for j, other in enumerate(tree.children)]))
for option in set(child.children)] for option in set(child.children)]
break break
@@ -217,7 +217,7 @@ class SimplifyRule_Visitor(Visitor):
if rule.data == 'expansions': if rule.data == 'expansions':
aliases = [] aliases = []
for child in tree.children[0].children: for child in tree.children[0].children:
aliases.append(T('alias', [child, alias_name]))
aliases.append(ST('alias', [child, alias_name]))
tree.data = 'expansions' tree.data = 'expansions'
tree.children = aliases tree.children = aliases


@@ -239,7 +239,7 @@ class RuleTreeToText(Transformer):


class CanonizeTree(InlineTransformer): class CanonizeTree(InlineTransformer):
def maybe(self, expr): def maybe(self, expr):
return T('expr', [expr, Token('OP', '?', -1)])
return ST('expr', [expr, Token('OP', '?', -1)])


def tokenmods(self, *args): def tokenmods(self, *args):
if len(args) == 1: if len(args) == 1:
@@ -353,7 +353,7 @@ def _literal_to_pattern(literal):


class PrepareLiterals(InlineTransformer): class PrepareLiterals(InlineTransformer):
def literal(self, literal): def literal(self, literal):
return T('pattern', [_literal_to_pattern(literal)])
return ST('pattern', [_literal_to_pattern(literal)])


def range(self, start, end): def range(self, start, end):
assert start.type == end.type == 'STRING' assert start.type == end.type == 'STRING'
@@ -361,13 +361,13 @@ class PrepareLiterals(InlineTransformer):
end = end.value[1:-1] end = end.value[1:-1]
assert len(start) == len(end) == 1, (start, end, len(start), len(end)) assert len(start) == len(end) == 1, (start, end, len(start), len(end))
regexp = '[%s-%s]' % (start, end) regexp = '[%s-%s]' % (start, end)
return T('pattern', [PatternRE(regexp)])
return ST('pattern', [PatternRE(regexp)])


class SplitLiterals(InlineTransformer): class SplitLiterals(InlineTransformer):
def pattern(self, p): def pattern(self, p):
if isinstance(p, PatternStr) and len(p.value)>1: if isinstance(p, PatternStr) and len(p.value)>1:
return T('expansion', [T('pattern', [PatternStr(ch, flags=p.flags)]) for ch in p.value])
return T('pattern', [p])
return ST('expansion', [ST('pattern', [PatternStr(ch, flags=p.flags)]) for ch in p.value])
return ST('pattern', [p])


class TokenTreeToPattern(Transformer): class TokenTreeToPattern(Transformer):
def pattern(self, ps): def pattern(self, ps):
@@ -408,14 +408,14 @@ class TokenTreeToPattern(Transformer):
def _interleave(l, item): def _interleave(l, item):
for e in l: for e in l:
yield e yield e
if isinstance(e, T):
if isinstance(e, Tree):
if e.data in ('literal', 'range'): if e.data in ('literal', 'range'):
yield item yield item
elif is_terminal(e): elif is_terminal(e):
yield item yield item


def _choice_of_rules(rules): def _choice_of_rules(rules):
return T('expansions', [T('expansion', [Token('RULE', name)]) for name in rules])
return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules])


class Grammar: class Grammar:
def __init__(self, rule_defs, token_defs, ignore): def __init__(self, rule_defs, token_defs, ignore):
@@ -442,9 +442,9 @@ class Grammar:
if r == start: if r == start:
exp.children = [expr] + exp.children exp.children = [expr] + exp.children
for exp in tree.find_data('expr'): for exp in tree.find_data('expr'):
exp.children[0] = T('expansion', list(_interleave(exp.children[:1], expr)))
exp.children[0] = ST('expansion', list(_interleave(exp.children[:1], expr)))


_ignore_tree = T('expr', [_choice_of_rules(terms_to_ignore.values()), Token('OP', '?')])
_ignore_tree = ST('expr', [_choice_of_rules(terms_to_ignore.values()), Token('OP', '?')])
rule_defs.append(('__ignore', _ignore_tree, None)) rule_defs.append(('__ignore', _ignore_tree, None))


# Convert all tokens to rules # Convert all tokens to rules
@@ -584,7 +584,7 @@ class GrammarLoader:


rules = [options_from_rule(name, x) for name, x in RULES.items()] rules = [options_from_rule(name, x) for name, x in RULES.items()]
rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs] rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs]
callback = ParseTreeBuilder(rules, T).create_callback()
callback = ParseTreeBuilder(rules, ST).create_callback()
lexer_conf = LexerConf(tokens, ['WS', 'COMMENT']) lexer_conf = LexerConf(tokens, ['WS', 'COMMENT'])


parser_conf = ParserConf(rules, callback, 'start') parser_conf = ParserConf(rules, callback, 'start')


+ 2
- 2
lark/parser_frontends.py View File

@@ -15,9 +15,9 @@ class WithLexer:


def init_contextual_lexer(self, lexer_conf, parser_conf): def init_contextual_lexer(self, lexer_conf, parser_conf):
self.lexer_conf = lexer_conf self.lexer_conf = lexer_conf
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept, user_callbacks=lexer_conf.callbacks)
self.lexer = ContextualLexer(lexer_conf.tokens, states, ignore=lexer_conf.ignore, always_accept=always_accept, user_callbacks=lexer_conf.callbacks)


def lex(self, text): def lex(self, text):
stream = self.lexer.lex(text) stream = self.lexer.lex(text)


+ 3
- 3
lark/parsers/earley.py View File

@@ -145,16 +145,16 @@ class Column:


class Parser: class Parser:
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None): def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None):
self.analysis = GrammarAnalyzer(parser_conf)
analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity self.resolve_ambiguity = resolve_ambiguity


self.FIRST = self.analysis.FIRST
self.FIRST = analysis.FIRST
self.postprocess = {} self.postprocess = {}
self.predictions = {} self.predictions = {}
for rule in parser_conf.rules: for rule in parser_conf.rules:
self.postprocess[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias) self.postprocess[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]


self.term_matcher = term_matcher self.term_matcher = term_matcher




+ 2
- 1
lark/parsers/grammar_analysis.py View File

@@ -136,7 +136,8 @@ class GrammarAnalyzer(object):
if not is_terminal(new_r): if not is_terminal(new_r):
yield new_r yield new_r


_ = list(bfs([rule], _expand_rule))
for _ in bfs([rule], _expand_rule):
pass


return fzset(init_ptrs) return fzset(init_ptrs)




+ 2
- 1
lark/parsers/lalr_parser.py View File

@@ -11,11 +11,12 @@ class Parser:
def __init__(self, parser_conf): def __init__(self, parser_conf):
assert all(r.options is None or r.options.priority is None assert all(r.options is None or r.options.priority is None
for r in parser_conf.rules), "LALR doesn't yet support prioritization" for r in parser_conf.rules), "LALR doesn't yet support prioritization"
self.analysis = analysis = LALR_Analyzer(parser_conf)
analysis = LALR_Analyzer(parser_conf)
analysis.compute_lookahead() analysis.compute_lookahead()
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
for rule in parser_conf.rules} for rule in parser_conf.rules}


self._parse_table = analysis.parse_table
self.parser_conf = parser_conf self.parser_conf = parser_conf
self.parser = _Parser(analysis.parse_table, callbacks) self.parser = _Parser(analysis.parse_table, callbacks)
self.parse = self.parser.parse self.parse = self.parser.parse


+ 1
- 1
lark/tools/standalone.py View File

@@ -126,7 +126,7 @@ def _get_token_type(token_type):


class ParserAtoms: class ParserAtoms:
def __init__(self, parser): def __init__(self, parser):
self.parse_table = parser.analysis.parse_table
self.parse_table = parser._parse_table


def print_python(self): def print_python(self):
print('class ParseTable: pass') print('class ParseTable: pass')


+ 2
- 0
lark/tree.py View File

@@ -99,6 +99,8 @@ class Tree(object):
self.data = data self.data = data
self.children = children self.children = children


class SlottedTree(Tree):
__slots__ = 'data', 'children', 'rule'




###{standalone ###{standalone


Loading…
Cancel
Save