Browse Source

LALR optimizations and profiling

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Raekye 5 years ago
parent
commit
6f412c25b7
4 changed files with 348 additions and 46 deletions
  1. +8
    -3
      lark/grammar.py
  2. +113
    -8
      lark/parsers/grammar_analysis.py
  3. +215
    -35
      lark/parsers/lalr_analysis.py
  4. +12
    -0
      lark/parsers/lalr_parser.py

+ 8
- 3
lark/grammar.py View File

@@ -3,10 +3,13 @@ from .utils import Serialize
###{standalone ###{standalone


class Symbol(Serialize): class Symbol(Serialize):
__slots__ = ('name', '_hash')

is_term = NotImplemented is_term = NotImplemented


def __init__(self, name): def __init__(self, name):
self.name = name self.name = name
self._hash = hash(self.name)


def __eq__(self, other): def __eq__(self, other):
assert isinstance(other, Symbol), other assert isinstance(other, Symbol), other
@@ -16,7 +19,7 @@ class Symbol(Serialize):
return not (self == other) return not (self == other)


def __hash__(self): def __hash__(self):
return hash(self.name)
return self._hash


def __repr__(self): def __repr__(self):
return '%s(%r)' % (type(self).__name__, self.name) return '%s(%r)' % (type(self).__name__, self.name)
@@ -31,6 +34,7 @@ class Terminal(Symbol):


def __init__(self, name, filter_out=False): def __init__(self, name, filter_out=False):
self.name = name self.name = name
self._hash = hash(self.name)
self.filter_out = filter_out self.filter_out = filter_out


@property @property
@@ -69,7 +73,7 @@ class Rule(Serialize):
expansion : a list of symbols expansion : a list of symbols
order : index of this expansion amongst all rules of the same name order : index of this expansion amongst all rules of the same name
""" """
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp')


__serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
__serialize_namespace__ = Terminal, NonTerminal, RuleOptions __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
@@ -81,6 +85,7 @@ class Rule(Serialize):
self.order = order self.order = order
self.options = options self.options = options
self._hash = hash((self.origin, tuple(self.expansion))) self._hash = hash((self.origin, tuple(self.expansion)))
self._rp = None


def _deserialize(self): def _deserialize(self):
self._hash = hash((self.origin, tuple(self.expansion))) self._hash = hash((self.origin, tuple(self.expansion)))
@@ -101,4 +106,4 @@ class Rule(Serialize):






###}
###}

+ 113
- 8
lark/parsers/grammar_analysis.py View File

@@ -1,18 +1,41 @@
from collections import Counter
from collections import Counter, defaultdict


from ..utils import bfs, fzset, classify from ..utils import bfs, fzset, classify
from ..exceptions import GrammarError from ..exceptions import GrammarError
from ..grammar import Rule, Terminal, NonTerminal from ..grammar import Rule, Terminal, NonTerminal
import time


t_firsts = 0
t_xy = 0
t_call = 0
cache_hits = 0
cache_misses = 0

# used to be just a tuple (rp, la)
# but by making it an object,
# the hash and equality become trivial
# (slightly faster for sets which are hashtables?)
class RulePtrLookahead(object):
__slots__ = 'rp', 'la'

def __init__(self, rp, la):
self.rp = rp
self.la = la


class RulePtr(object): class RulePtr(object):
__slots__ = ('rule', 'index')
__slots__ = ('rule', 'index', '_advance', '_lookaheads', '_next_rules_by_origin', '_first')


def __init__(self, rule, index): def __init__(self, rule, index):
assert isinstance(rule, Rule) assert isinstance(rule, Rule)
assert index <= len(rule.expansion) assert index <= len(rule.expansion)
self.rule = rule self.rule = rule
self.index = index self.index = index
#self._hash = hash((self.rule, self.index))
#self._hash = None
self._advance = None
self._lookaheads = {}
self._next_rules_by_origin = None
self._first = None


def __repr__(self): def __repr__(self):
before = [x.name for x in self.rule.expansion[:self.index]] before = [x.name for x in self.rule.expansion[:self.index]]
@@ -23,32 +46,102 @@ class RulePtr(object):
def next(self): def next(self):
return self.rule.expansion[self.index] return self.rule.expansion[self.index]


# don't create duplicate RulePtrs
def advance(self, sym): def advance(self, sym):
assert self.next == sym assert self.next == sym
return RulePtr(self.rule, self.index+1)
a = self._advance
if a is None:
a = RulePtr(self.rule, self.index + 1)
self._advance = a
return a


@property @property
def is_satisfied(self): def is_satisfied(self):
return self.index == len(self.rule.expansion) return self.index == len(self.rule.expansion)


def lookahead(self, la):
rp_la = self._lookaheads.get(la, None)
if rp_la is None:
rp_la = RulePtrLookahead(self, la)
self._lookaheads[la] = rp_la
return rp_la

def next_rules_by_origin(self, rules_by_origin):
n = self._next_rules_by_origin
if n is None:
n = rules_by_origin[self.next]
self._next_rules_by_origin = n
return n

# recursive form of lalr_analyis.py:343 (which is easier to understand IMO)
# normally avoid recursion but this allows us to cache
# each intermediate step in a corresponding RulePtr
def first(self, i, firsts, nullable, t):
global cache_hits
global cache_misses
global t_firsts
global t_xy
global t_call
t_call += time.time() - t
n = len(self.rule.expansion)
if i == n:
return ([], True)
x = self._first
t_x = time.time()
if x is None:
t0 = time.time()
t_y = time.time()
cache_misses += 1
s = self.rule.expansion[i]
l = list(firsts.get(s, []))
b = (s in nullable)
if b:
t1 = time.time()
t_firsts += t1 - t0
l_b_2 = self.advance(s).first(i + 1, firsts, nullable, time.time())
#l_b_2 = first(self.advance(self.next), i + 1, firsts, nullable, time.time())
t0 = time.time()
l.extend(l_b_2[0])
b = l_b_2[1]
x = (l, b)
self._first = x
t1 = time.time()
t_firsts += t1 - t0
else:
t_y = time.time()
cache_hits += 1
t_xy += t_y - t_x
return x

# optimizations were made so that there should never be
# two distinct equal RulePtrs
# should help set/hashtable lookups?
'''
def __eq__(self, other): def __eq__(self, other):
return self.rule == other.rule and self.index == other.index return self.rule == other.rule and self.index == other.index
def __hash__(self): def __hash__(self):
return hash((self.rule, self.index))
return self._hash
'''



class LR0ItemSet(object): class LR0ItemSet(object):
__slots__ = ('kernel', 'closure', 'transitions')
__slots__ = ('kernel', 'closure', 'transitions', 'lookaheads', '_hash')


def __init__(self, kernel, closure): def __init__(self, kernel, closure):
self.kernel = fzset(kernel) self.kernel = fzset(kernel)
self.closure = fzset(closure) self.closure = fzset(closure)
self.transitions = {} self.transitions = {}
self.lookaheads = defaultdict(set)
#self._hash = hash(self.kernel)


# state generation ensures no duplicate LR0ItemSets
'''
def __eq__(self, other): def __eq__(self, other):
return self.kernel == other.kernel return self.kernel == other.kernel


def __hash__(self): def __hash__(self):
return hash(self.kernel)
return self._hash
'''


def __repr__(self): def __repr__(self):
return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure])) return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
@@ -153,14 +246,22 @@ class GrammarAnalyzer(object):
for start in parser_conf.start} for start in parser_conf.start}


lr0_rules = parser_conf.rules + list(lr0_root_rules.values()) lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
assert(len(lr0_rules) == len(set(lr0_rules)))


self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)


self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
for root_rule in lr0_root_rules.values():
root_rule._rp = RulePtr(root_rule, 0)
self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
for start, root_rule in lr0_root_rules.items()} for start, root_rule in lr0_root_rules.items()}


self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)


# unused, did not help
self.lr1_cache = {}
self.lr1_cache2 = {}

def expand_rule(self, source_rule, rules_by_origin=None): def expand_rule(self, source_rule, rules_by_origin=None):
"Returns all init_ptrs accessible by rule (recursive)" "Returns all init_ptrs accessible by rule (recursive)"


@@ -172,7 +273,11 @@ class GrammarAnalyzer(object):
assert not rule.is_term, rule assert not rule.is_term, rule


for r in rules_by_origin[rule]: for r in rules_by_origin[rule]:
init_ptr = RulePtr(r, 0)
# don't create duplicate RulePtr objects
init_ptr = r._rp
if init_ptr is None:
init_ptr = RulePtr(r, 0)
r._rp = init_ptr
init_ptrs.add(init_ptr) init_ptrs.add(init_ptr)


if r.expansion: # if not empty rule if r.expansion: # if not empty rule


+ 215
- 35
lark/parsers/lalr_analysis.py View File

@@ -7,13 +7,16 @@ For now, shift/reduce conflicts are automatically resolved as shifts.
# Email : erezshin@gmail.com # Email : erezshin@gmail.com


import logging import logging
from collections import defaultdict
from collections import defaultdict, deque


from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator
from ..exceptions import GrammarError from ..exceptions import GrammarError


from .grammar_analysis import GrammarAnalyzer, Terminal, RulePtr, LR0ItemSet from .grammar_analysis import GrammarAnalyzer, Terminal, RulePtr, LR0ItemSet
from ..grammar import Rule from ..grammar import Rule
from . import grammar_analysis

import time


###{standalone ###{standalone


@@ -28,6 +31,16 @@ class Action:
Shift = Action('Shift') Shift = Action('Shift')
Reduce = Action('Reduce') Reduce = Action('Reduce')


t_set_0 = 0
t_set_1 = 0
t_expand = 0
t_rules = 0
t_append = 0
t_z = 0
t_begin = 0
t_count = 0
t_call = 0

class ParseTable: class ParseTable:
def __init__(self, states, start_states, end_states): def __init__(self, states, start_states, end_states):
self.states = states self.states = states
@@ -86,20 +99,24 @@ class LALR_Analyzer(GrammarAnalyzer):


def generate_lr0_states(self): def generate_lr0_states(self):
self.states = set() self.states = set()
# map of kernels to LR0ItemSets
cache = {}


def step(state): def step(state):
_, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied) _, unsat = classify_bool(state.closure, lambda rp: rp.is_satisfied)


d = classify(unsat, lambda rp: rp.next) d = classify(unsat, lambda rp: rp.next)
for sym, rps in d.items(): for sym, rps in d.items():
kernel = {rp.advance(sym) for rp in rps}
closure = set(kernel)
kernel = fzset({rp.advance(sym) for rp in rps})
new_state = cache.get(kernel, None)
if new_state is None:
closure = set(kernel)
for rp in kernel:
if not rp.is_satisfied and not rp.next.is_term:
closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)
new_state = LR0ItemSet(kernel, closure)
cache[kernel] = new_state


for rp in kernel:
if not rp.is_satisfied and not rp.next.is_term:
closure |= self.expand_rule(rp.next, self.lr0_rules_by_origin)

new_state = LR0ItemSet(kernel, closure)
state.transitions[sym] = new_state state.transitions[sym] = new_state
yield new_state yield new_state


@@ -109,36 +126,59 @@ class LALR_Analyzer(GrammarAnalyzer):
pass pass


def discover_lookaheads(self): def discover_lookaheads(self):
# lookaheads is now a member of LR0ItemSet, so don't need to look up a dictionary here
# state -> rule -> set of lookaheads # state -> rule -> set of lookaheads
self.lookaheads = defaultdict(lambda: defaultdict(set))
#self.lookaheads = defaultdict(lambda: defaultdict(set))
# state -> rule -> list of (set of lookaheads) to propagate to # state -> rule -> list of (set of lookaheads) to propagate to
self.propagates = defaultdict(lambda: defaultdict(list))
#self.propagates = defaultdict(lambda: defaultdict(list))
self.propagates = {}

t0 = time.time()


t = Terminal('$END')
for s in self.lr0_start_states.values(): for s in self.lr0_start_states.values():
for rp in s.kernel: for rp in s.kernel:
self.lookaheads[s][rp].add(Terminal('$END'))
#self.lookaheads[s][rp].add(Terminal('$END'))
s.lookaheads[rp].add(t)

t_closure = 0


# There is a 1 to 1 correspondance between LR0 and LALR1 states. # There is a 1 to 1 correspondance between LR0 and LALR1 states.
# We calculate the lookaheads for LALR1 kernel items from the LR0 kernel items. # We calculate the lookaheads for LALR1 kernel items from the LR0 kernel items.
# use a terminal that does not exist in the grammar # use a terminal that does not exist in the grammar
t = Terminal('$#') t = Terminal('$#')
for s in self.states: for s in self.states:
p = {}
self.propagates[s] = p
for rp in s.kernel: for rp in s.kernel:
for rp2, la in self.generate_lr1_closure([(rp, t)]):
q = []
p[rp] = q
t2 = time.time()
z = self.generate_lr1_closure([rp.lookahead(t)], time.time())
t3 = time.time()
t_closure += t3 - t2
#for rp2, la in self.generate_lr1_closure([(rp, t)], time.time()):
for rp2_la in z:
rp2 = rp2_la.rp
la = rp2_la.la
if rp2.is_satisfied: if rp2.is_satisfied:
continue continue
next_symbol = rp2.next next_symbol = rp2.next
next_state = s.transitions[next_symbol] next_state = s.transitions[next_symbol]
rp3 = rp2.advance(next_symbol) rp3 = rp2.advance(next_symbol)
assert(rp3 in next_state.kernel) assert(rp3 in next_state.kernel)
x = self.lookaheads[next_state][rp3]
#x = self.lookaheads[next_state][rp3]
x = next_state.lookaheads[rp3]
if la == t: if la == t:
# we must propagate rp's lookaheads to rp3's lookahead set # we must propagate rp's lookaheads to rp3's lookahead set
self.propagates[s][rp].append(x)
q.append(x)
else: else:
# this lookahead is "generated spontaneously" for rp3 # this lookahead is "generated spontaneously" for rp3
x.add(la) x.add(la)


t1 = time.time()
print('Discovering took {:.3f} (generating closure), {:.3f} (total)'.format(t_closure, t1 - t0))

def propagate_lookaheads(self): def propagate_lookaheads(self):
changed = True changed = True
while changed: while changed:
@@ -146,7 +186,8 @@ class LALR_Analyzer(GrammarAnalyzer):
for s in self.states: for s in self.states:
for rp in s.kernel: for rp in s.kernel:
# from (from is a keyword) # from (from is a keyword)
f = self.lookaheads[s][rp]
#f = self.lookaheads[s][rp]
f = s.lookaheads[rp]
# to # to
t = self.propagates[s][rp] t = self.propagates[s][rp]
for x in t: for x in t:
@@ -155,20 +196,33 @@ class LALR_Analyzer(GrammarAnalyzer):
changed = changed or (len(x) != old) changed = changed or (len(x) != old)


def generate_lalr1_states(self): def generate_lalr1_states(self):
t0 = time.time()
# 1 to 1 correspondance between LR0 and LALR1 states # 1 to 1 correspondance between LR0 and LALR1 states
# We must fetch the lookaheads we calculated, # We must fetch the lookaheads we calculated,
# to create the LALR1 kernels from the LR0 kernels. # to create the LALR1 kernels from the LR0 kernels.
# Then, we generate the LALR1 states by taking the LR1 closure of the new kernel items. # Then, we generate the LALR1 states by taking the LR1 closure of the new kernel items.
# map of LR0 states to LALR1 states # map of LR0 states to LALR1 states
m = {} m = {}
t_closure = 0
z = 0
for s in self.states: for s in self.states:
z = max(z, len(s.closure))
kernel = [] kernel = []
for rp in s.kernel: for rp in s.kernel:
las = self.lookaheads[s][rp]
#las = self.lookaheads[s][rp]
las = s.lookaheads[rp]
assert(len(las) > 0) assert(len(las) > 0)
for la in las: for la in las:
kernel.append((rp, la))
m[s] = self.generate_lr1_closure(kernel)
kernel.append(rp.lookahead(la))
t0_0 = time.time()
m[s] = self.generate_lr1_closure(kernel, time.time())
t0_1 = time.time()
t_closure += t0_1 - t0_0

print('Generating lalr1 closure for lalr kernels took {:.3f}'.format(t_closure))
print('Max lr0 state size was {}'.format(z))

t1 = time.time()


self.states = {} self.states = {}
for s, v in m.items(): for s, v in m.items():
@@ -176,8 +230,8 @@ class LALR_Analyzer(GrammarAnalyzer):
for la, next_state in s.transitions.items(): for la, next_state in s.transitions.items():
actions[la] = (Shift, next_state.closure) actions[la] = (Shift, next_state.closure)


sat, _ = classify_bool(v, lambda x: x[0].is_satisfied)
reductions = classify(sat, lambda x: x[1], lambda x: x[0])
sat, _ = classify_bool(v, lambda x: x.rp.is_satisfied)
reductions = classify(sat, lambda x: x.la, lambda x: x.rp)
for la, rps in reductions.items(): for la, rps in reductions.items():
if len(rps) > 1: if len(rps) > 1:
raise GrammarError("Collision in %s: %s" % (la, ', '.join([ str(r.rule) for r in rps ]))) raise GrammarError("Collision in %s: %s" % (la, ', '.join([ str(r.rule) for r in rps ])))
@@ -190,6 +244,8 @@ class LALR_Analyzer(GrammarAnalyzer):


self.states[s.closure] = {k.name: v for k, v in actions.items()} self.states[s.closure] = {k.name: v for k, v in actions.items()}


t2 = time.time()

end_states = {} end_states = {}
for s in self.states: for s in self.states:
for rp in s: for rp in s:
@@ -198,44 +254,168 @@ class LALR_Analyzer(GrammarAnalyzer):
assert(not start in end_states) assert(not start in end_states)
end_states[start] = s end_states[start] = s


t3 = time.time()

self._parse_table = ParseTable(self.states, {start: state.closure for start, state in self.lr0_start_states.items()}, end_states) self._parse_table = ParseTable(self.states, {start: state.closure for start, state in self.lr0_start_states.items()}, end_states)


t4 = time.time()

if self.debug: if self.debug:
self.parse_table = self._parse_table self.parse_table = self._parse_table
else: else:
self.parse_table = IntParseTable.from_ParseTable(self._parse_table) self.parse_table = IntParseTable.from_ParseTable(self._parse_table)


def generate_lr1_closure(self, kernel):
t5 = time.time()

print(('Generating lalr1 states took ' + ', '.join([ '{:.3f}' ] * 5)).format(t1 - t0, t2 - t1, t3 - t2, t4 - t3, t5 - t4))
print('Generating firsts took {:.3f} (time actually calculating), {:.3f} (end to end), {:.3f} (just function call)'.format(grammar_analysis.t_firsts, grammar_analysis.t_xy, grammar_analysis.t_call))

def generate_lr1_closure(self, kernel, t_caller):
global t_call
global t_set_0
global t_set_1
global t_expand
global t_rules
global t_append
global t_z
global t_begin
global t_count

t_start = time.time()
t_call += t_start - t_caller

# cache the results of this function
# not many hits, no noticeable performance improvement
'''
k = fzset(kernel)
cached = self.lr1_cache.get(k, None)
if not cached is None:
return cached
'''

closure = set() closure = set()
closure_hash = {}

y = 0


q = list(kernel) q = list(kernel)
while len(q) > 0: while len(q) > 0:
rp, la = q.pop()
if (rp, la) in closure:
t_a = time.time()
rp_la = q.pop()
#rp_la_hash = hash(rp_la)
t0 = time.time()
t_begin += t0 - t_a
# try to manually maintain hashtable,
# as a set of just hashes (ints) was notably faster
'''
if rp_la_hash in closure_hash:
if rp_la in closure_hash[rp_la_hash]:
t0_0 = time.time()
t_set_0 += t0_0 - t0
continue
t0_0 = time.time()
t_set_0 += t0_0 - t0
else:
closure_hash[rp_la_hash] = []
'''
if rp_la in closure:
t0_0 = time.time()
t_set_0 += t0_0 - t0
continue continue
closure.add((rp, la))
t0_0 = time.time()
closure.add(rp_la)
#closure_hash[rp_la_hash].append(rp_la)
t1 = time.time()
t_set_0 += t0_0 - t0
t_set_1 += t1 - t0_0
rp = rp_la.rp
la = rp_la.la


if rp.is_satisfied: if rp.is_satisfied:
continue continue
if rp.next.is_term: if rp.next.is_term:
continue continue


t2 = time.time()

# cache these calculations inside each RulePtr
# see grammar_analysis.py:79
l = [] l = []
'''
i = rp.index + 1 i = rp.index + 1
n = len(rp.rule.expansion) n = len(rp.rule.expansion)
while i < n:
s = rp.rule.expansion[i]
l.extend(self.FIRST.get(s, []))
if not s in self.NULLABLE:
break
i += 1

l2_i = self.lr1_cache2.get((rp.rule, i), None)
l2 = []
if l2_i is None:
while i < n:
s = rp.rule.expansion[i]
l2.extend(self.FIRST.get(s, []))
if not s in self.NULLABLE:
break
i += 1
self.lr1_cache2[(rp.rule, i)] = (l2, i)
else:
l2 = l2_i[0]
i = l2_i[1]

l.extend(l2)
'''
# this function call seems really slow (see grammar_analysis.t_call above)
# tried making it not a method call so don't need to look up vtable
# still equally slow
l2, nullable = rp.first(rp.index + 1, self.FIRST, self.NULLABLE, time.time())
#l2, nullable = grammar_analysis.first(rp, rp.index + 1, self.FIRST, self.NULLABLE, time.time())
#l.extend(l2)
l = l2
t3 = time.time()

t_expand += t3 - t2

# if we don't modify l2 and add an extra check in the loop below,
# we don't have to copy it
# if all of rp.rule.expansion[rp.index + 1:] were nullable: # if all of rp.rule.expansion[rp.index + 1:] were nullable:
if i == n:
l.append(la)
#if nullable:
# l.append(la)

t4 = time.time()
x = rp.next_rules_by_origin(self.lr0_rules_by_origin)
t5 = time.time()


for r in self.lr0_rules_by_origin[rp.next]:
# usually between 20-60? seen as high as ~175
y = max(y, len(x) * len(l))
#print('adding {} * {} rules to closure max {}'.format(len(x), len(l), y))
for r in x:
for s in l: for s in l:
q.append((RulePtr(r, 0), s))
# cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
# cache r._rp in _rp (1 less object property lookup?)
_rp = r._rp
if _rp is None:
_rp = RulePtr(r, 0)
r._rp = _rp
q.append(_rp.lookahead(s))
#q.append((r._rp, s))
if nullable:
_rp = r._rp
if _rp is None:
_rp = RulePtr(r, 0)
r._rp = _rp
q.append(_rp.lookahead(la))
#q.append((r._rp, la))

t6 = time.time()
t_rules += t5 - t4
t_append += t6 - t5

#self.lr1_cache[k] = closure

t_end = time.time()
t_z += t_end - t_start

t_count += 1

if t_count % 1000 == 0:
print('\tGenerating lr1 closure took begin {:.3f}, set contains {:.3f}, set add {:.3f}, get first {:.3f}'.format(t_begin, t_set_0, t_set_1, t_expand))
print('\tget next rules {:.3f}, append rules {:.3f}, total {:.3f}, call time {:.3f}, count {}'.format(t_rules, t_append, t_z, t_call, t_count))
print('\tmax number of appends {}'.format(y))


return closure return closure

+ 12
- 0
lark/parsers/lalr_parser.py View File

@@ -8,6 +8,8 @@ from ..utils import Enumerator, Serialize


from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable


import time



###{standalone ###{standalone
class LALR_Parser(object): class LALR_Parser(object):
@@ -15,10 +17,20 @@ class LALR_Parser(object):
assert all(r.options is None or r.options.priority is None assert all(r.options is None or r.options.priority is None
for r in parser_conf.rules), "LALR doesn't yet support prioritization" for r in parser_conf.rules), "LALR doesn't yet support prioritization"
analysis = LALR_Analyzer(parser_conf, debug=debug) analysis = LALR_Analyzer(parser_conf, debug=debug)
t0 = time.time()
analysis.generate_lr0_states() analysis.generate_lr0_states()
t1 = time.time()
analysis.discover_lookaheads() analysis.discover_lookaheads()
t2 = time.time()
analysis.propagate_lookaheads() analysis.propagate_lookaheads()
t3 = time.time()
analysis.generate_lalr1_states() analysis.generate_lalr1_states()
t4 = time.time()
print('Generating lr0 states took {:.3f}'.format(t1 - t0))
print('Discovering lookaheads took {:.3f}'.format(t2 - t1))
print('Propagating lookaheads took took {:.3f}'.format(t3 - t2))
print('Generating lalr states (closure) took {:.3f}'.format(t4 - t3))
print('-' * 32)
callbacks = parser_conf.callbacks callbacks = parser_conf.callbacks


self._parse_table = analysis.parse_table self._parse_table = analysis.parse_table


Loading…
Cancel
Save