Browse Source

Improvements based on the PR of @drslump (https://github.com/erezsh/lark/pull/125)

Mostly improvements to memory consumption.
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.0
Erez Shinan 6 years ago
parent
commit
04c0b04add
7 changed files with 31 additions and 27 deletions
  1. +19
    -19
      lark/load_grammar.py
  2. +2
    -2
      lark/parser_frontends.py
  3. +3
    -3
      lark/parsers/earley.py
  4. +2
    -1
      lark/parsers/grammar_analysis.py
  5. +2
    -1
      lark/parsers/lalr_parser.py
  6. +1
    -1
      lark/tools/standalone.py
  7. +2
    -0
      lark/tree.py

+ 19
- 19
lark/load_grammar.py View File

@@ -14,7 +14,7 @@ from .parsers.lalr_parser import UnexpectedToken
from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef
from .grammar import RuleOptions, Rule

from .tree import Tree as T, Transformer, InlineTransformer, Visitor
from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST

__path__ = os.path.dirname(__file__)
IMPORT_PATHS = [os.path.join(__path__, 'grammars')]
@@ -145,14 +145,14 @@ class EBNF_to_BNF(InlineTransformer):
new_name = '__%s_%s_%d' % (self.prefix, type_, self.i)
self.i += 1
t = Token('RULE', new_name, -1)
tree = T('expansions', [T('expansion', [expr]), T('expansion', [t, expr])])
tree = ST('expansions', [ST('expansion', [expr]), ST('expansion', [t, expr])])
self.new_rules.append((new_name, tree, self.rule_options))
self.rules_by_expr[expr] = t
return t

def expr(self, rule, op, *args):
if op.value == '?':
return T('expansions', [rule, T('expansion', [])])
return ST('expansions', [rule, ST('expansion', [])])
elif op.value == '+':
# a : b c+ d
# -->
@@ -165,7 +165,7 @@ class EBNF_to_BNF(InlineTransformer):
# a : b _c? d
# _c : _c c | c;
new_name = self._add_recurse_rule('star', rule)
return T('expansions', [new_name, T('expansion', [])])
return ST('expansions', [new_name, ST('expansion', [])])
elif op.value == '~':
if len(args) == 1:
mn = mx = int(args[0])
@@ -173,7 +173,7 @@ class EBNF_to_BNF(InlineTransformer):
mn, mx = map(int, args)
if mx < mn:
raise GrammarError("Bad Range for %s (%d..%d isn't allowed)" % (rule, mn, mx))
return T('expansions', [T('expansion', [rule] * n) for n in range(mn, mx+1)])
return ST('expansions', [ST('expansion', [rule] * n) for n in range(mn, mx+1)])
assert False, op


@@ -183,7 +183,7 @@ class SimplifyRule_Visitor(Visitor):
def _flatten(tree):
while True:
to_expand = [i for i, child in enumerate(tree.children)
if isinstance(child, T) and child.data == tree.data]
if isinstance(child, Tree) and child.data == tree.data]
if not to_expand:
break
tree.expand_kids_by_index(*to_expand)
@@ -203,9 +203,9 @@ class SimplifyRule_Visitor(Visitor):
self._flatten(tree)

for i, child in enumerate(tree.children):
if isinstance(child, T) and child.data == 'expansions':
if isinstance(child, Tree) and child.data == 'expansions':
tree.data = 'expansions'
tree.children = [self.visit(T('expansion', [option if i==j else other
tree.children = [self.visit(ST('expansion', [option if i==j else other
for j, other in enumerate(tree.children)]))
for option in set(child.children)]
break
@@ -217,7 +217,7 @@ class SimplifyRule_Visitor(Visitor):
if rule.data == 'expansions':
aliases = []
for child in tree.children[0].children:
aliases.append(T('alias', [child, alias_name]))
aliases.append(ST('alias', [child, alias_name]))
tree.data = 'expansions'
tree.children = aliases

@@ -239,7 +239,7 @@ class RuleTreeToText(Transformer):

class CanonizeTree(InlineTransformer):
def maybe(self, expr):
return T('expr', [expr, Token('OP', '?', -1)])
return ST('expr', [expr, Token('OP', '?', -1)])

def tokenmods(self, *args):
if len(args) == 1:
@@ -353,7 +353,7 @@ def _literal_to_pattern(literal):

class PrepareLiterals(InlineTransformer):
def literal(self, literal):
return T('pattern', [_literal_to_pattern(literal)])
return ST('pattern', [_literal_to_pattern(literal)])

def range(self, start, end):
assert start.type == end.type == 'STRING'
@@ -361,13 +361,13 @@ class PrepareLiterals(InlineTransformer):
end = end.value[1:-1]
assert len(start) == len(end) == 1, (start, end, len(start), len(end))
regexp = '[%s-%s]' % (start, end)
return T('pattern', [PatternRE(regexp)])
return ST('pattern', [PatternRE(regexp)])

class SplitLiterals(InlineTransformer):
def pattern(self, p):
if isinstance(p, PatternStr) and len(p.value)>1:
return T('expansion', [T('pattern', [PatternStr(ch, flags=p.flags)]) for ch in p.value])
return T('pattern', [p])
return ST('expansion', [ST('pattern', [PatternStr(ch, flags=p.flags)]) for ch in p.value])
return ST('pattern', [p])

class TokenTreeToPattern(Transformer):
def pattern(self, ps):
@@ -408,14 +408,14 @@ class TokenTreeToPattern(Transformer):
def _interleave(l, item):
for e in l:
yield e
if isinstance(e, T):
if isinstance(e, Tree):
if e.data in ('literal', 'range'):
yield item
elif is_terminal(e):
yield item

def _choice_of_rules(rules):
return T('expansions', [T('expansion', [Token('RULE', name)]) for name in rules])
return ST('expansions', [ST('expansion', [Token('RULE', name)]) for name in rules])

class Grammar:
def __init__(self, rule_defs, token_defs, ignore):
@@ -442,9 +442,9 @@ class Grammar:
if r == start:
exp.children = [expr] + exp.children
for exp in tree.find_data('expr'):
exp.children[0] = T('expansion', list(_interleave(exp.children[:1], expr)))
exp.children[0] = ST('expansion', list(_interleave(exp.children[:1], expr)))

_ignore_tree = T('expr', [_choice_of_rules(terms_to_ignore.values()), Token('OP', '?')])
_ignore_tree = ST('expr', [_choice_of_rules(terms_to_ignore.values()), Token('OP', '?')])
rule_defs.append(('__ignore', _ignore_tree, None))

# Convert all tokens to rules
@@ -584,7 +584,7 @@ class GrammarLoader:

rules = [options_from_rule(name, x) for name, x in RULES.items()]
rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs]
callback = ParseTreeBuilder(rules, T).create_callback()
callback = ParseTreeBuilder(rules, ST).create_callback()
lexer_conf = LexerConf(tokens, ['WS', 'COMMENT'])

parser_conf = ParserConf(rules, callback, 'start')


+ 2
- 2
lark/parser_frontends.py View File

@@ -15,9 +15,9 @@ class WithLexer:

def init_contextual_lexer(self, lexer_conf, parser_conf):
self.lexer_conf = lexer_conf
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept, user_callbacks=lexer_conf.callbacks)
self.lexer = ContextualLexer(lexer_conf.tokens, states, ignore=lexer_conf.ignore, always_accept=always_accept, user_callbacks=lexer_conf.callbacks)

def lex(self, text):
stream = self.lexer.lex(text)


+ 3
- 3
lark/parsers/earley.py View File

@@ -145,16 +145,16 @@ class Column:

class Parser:
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None):
self.analysis = GrammarAnalyzer(parser_conf)
analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity

self.FIRST = self.analysis.FIRST
self.FIRST = analysis.FIRST
self.postprocess = {}
self.predictions = {}
for rule in parser_conf.rules:
self.postprocess[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]

self.term_matcher = term_matcher



+ 2
- 1
lark/parsers/grammar_analysis.py View File

@@ -136,7 +136,8 @@ class GrammarAnalyzer(object):
if not is_terminal(new_r):
yield new_r

_ = list(bfs([rule], _expand_rule))
for _ in bfs([rule], _expand_rule):
pass

return fzset(init_ptrs)



+ 2
- 1
lark/parsers/lalr_parser.py View File

@@ -11,11 +11,12 @@ class Parser:
def __init__(self, parser_conf):
assert all(r.options is None or r.options.priority is None
for r in parser_conf.rules), "LALR doesn't yet support prioritization"
self.analysis = analysis = LALR_Analyzer(parser_conf)
analysis = LALR_Analyzer(parser_conf)
analysis.compute_lookahead()
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
for rule in parser_conf.rules}

self._parse_table = analysis.parse_table
self.parser_conf = parser_conf
self.parser = _Parser(analysis.parse_table, callbacks)
self.parse = self.parser.parse


+ 1
- 1
lark/tools/standalone.py View File

@@ -126,7 +126,7 @@ def _get_token_type(token_type):

class ParserAtoms:
def __init__(self, parser):
self.parse_table = parser.analysis.parse_table
self.parse_table = parser._parse_table

def print_python(self):
print('class ParseTable: pass')


+ 2
- 0
lark/tree.py View File

@@ -99,6 +99,8 @@ class Tree(object):
self.data = data
self.children = children

class SlottedTree(Tree):
__slots__ = 'data', 'children', 'rule'


###{standalone


Loading…
Cancel
Save