Browse Source

Cleanup

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Shinan 5 years ago
parent
commit
8466981c08
4 changed files with 40 additions and 64 deletions
  1. +1
    -2
      lark/grammar.py
  2. +9
    -25
      lark/parsers/grammar_analysis.py
  3. +27
    -22
      lark/parsers/lalr_analysis.py
  4. +3
    -15
      lark/parsers/lalr_parser.py

+ 1
- 2
lark/grammar.py View File

@@ -71,7 +71,7 @@ class Rule(Serialize):
expansion : a list of symbols
order : index of this expansion amongst all rules of the same name
"""
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp')
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')

__serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
__serialize_namespace__ = Terminal, NonTerminal, RuleOptions
@@ -83,7 +83,6 @@ class Rule(Serialize):
self.order = order
self.options = options
self._hash = hash((self.origin, tuple(self.expansion)))
self._rp = None

def _deserialize(self):
self._hash = hash((self.origin, tuple(self.expansion)))


+ 9
- 25
lark/parsers/grammar_analysis.py View File

@@ -3,20 +3,16 @@ from collections import Counter, defaultdict
from ..utils import bfs, fzset, classify
from ..exceptions import GrammarError
from ..grammar import Rule, Terminal, NonTerminal
import time


# optimizations were made so that there should never be two distinct equal RulePtrs
# to help with hashtable lookup
class RulePtr(object):
__slots__ = ('rule', 'index', '_advance')
__slots__ = ('rule', 'index')

def __init__(self, rule, index):
assert isinstance(rule, Rule)
assert index <= len(rule.expansion)
self.rule = rule
self.index = index
self._advance = None

def __repr__(self):
before = [x.name for x in self.rule.expansion[:self.index]]
@@ -27,19 +23,19 @@ class RulePtr(object):
def next(self):
return self.rule.expansion[self.index]

# don't create duplicate RulePtrs
def advance(self, sym):
assert self.next == sym
a = self._advance
if a is None:
a = RulePtr(self.rule, self.index + 1)
self._advance = a
return a
return RulePtr(self.rule, self.index+1)

@property
def is_satisfied(self):
return self.index == len(self.rule.expansion)

def __eq__(self, other):
return self.rule == other.rule and self.index == other.index
def __hash__(self):
return hash((self.rule, self.index))


# state generation ensures no duplicate LR0ItemSets
class LR0ItemSet(object):
@@ -159,19 +155,11 @@ class GrammarAnalyzer(object):
self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)

# cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
for root_rule in lr0_root_rules.values():
root_rule._rp = RulePtr(root_rule, 0)
self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
for start, root_rule in lr0_root_rules.items()}

self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)

self.nonterminal_transitions = []
self.directly_reads = defaultdict(set)
self.reads = defaultdict(set)
self.includes = defaultdict(set)
self.lookback = defaultdict(set)

def expand_rule(self, source_rule, rules_by_origin=None):
"Returns all init_ptrs accessible by rule (recursive)"

@@ -183,11 +171,7 @@ class GrammarAnalyzer(object):
assert not rule.is_term, rule

for r in rules_by_origin[rule]:
# don't create duplicate RulePtr objects
init_ptr = r._rp
if init_ptr is None:
init_ptr = RulePtr(r, 0)
r._rp = init_ptr
init_ptr = RulePtr(r, 0)
init_ptrs.add(init_ptr)

if r.expansion: # if not empty rule


+ 27
- 22
lark/parsers/lalr_analysis.py View File

@@ -15,8 +15,6 @@ from ..exceptions import GrammarError
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
from ..grammar import Rule

import time

###{standalone

class Action:
@@ -115,8 +113,8 @@ def traverse(x, S, N, X, R, G, F):
S.append(x)
d = len(S)
N[x] = d
F[x] = G(x)
for y in R(x):
F[x] = G[x]
for y in R[x]:
if N[y] == 0:
traverse(y, S, N, X, R, G, F)
n_x = N[x]
@@ -137,9 +135,17 @@ def traverse(x, S, N, X, R, G, F):


class LALR_Analyzer(GrammarAnalyzer):
def __init__(self, parser_conf, debug=False):
GrammarAnalyzer.__init__(self, parser_conf, debug)
self.nonterminal_transitions = []
self.directly_reads = defaultdict(set)
self.reads = defaultdict(set)
self.includes = defaultdict(set)
self.lookback = defaultdict(set)


def compute_lr0_states(self):
self.states = set()
self.lr0_states = set()
# map of kernels to LR0ItemSets
cache = {}

@@ -161,7 +167,7 @@ class LALR_Analyzer(GrammarAnalyzer):
state.transitions[sym] = new_state
yield new_state

self.states.add(state)
self.lr0_states.add(state)

for _ in bfs(self.lr0_start_states.values(), step):
pass
@@ -174,14 +180,14 @@ class LALR_Analyzer(GrammarAnalyzer):
assert(rp.index == 0)
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])

for state in self.states:
for state in self.lr0_states:
seen = set()
for rp in state.closure:
if rp.is_satisfied:
continue
s = rp.next
# if s is a not a nonterminal
if not s in self.lr0_rules_by_origin:
if s not in self.lr0_rules_by_origin:
continue
if s in seen:
continue
@@ -201,11 +207,6 @@ class LALR_Analyzer(GrammarAnalyzer):
if s2 in self.NULLABLE:
r.add((next_state, s2))

def compute_read_sets(self):
R = lambda nt: self.reads[nt]
G = lambda nt: self.directly_reads[nt]
self.read_sets = digraph(self.nonterminal_transitions, R, G)

def compute_includes_lookback(self):
for nt in self.nonterminal_transitions:
state, nonterminal = nt
@@ -220,9 +221,8 @@ class LALR_Analyzer(GrammarAnalyzer):
s = rp.rule.expansion[i]
nt2 = (state2, s)
state2 = state2.transitions[s]
if not nt2 in self.reads:
if nt2 not in self.reads:
continue
j = i + 1
for j in range(i + 1, len(rp.rule.expansion)):
if not rp.rule.expansion[j] in self.NULLABLE:
break
@@ -236,20 +236,18 @@ class LALR_Analyzer(GrammarAnalyzer):
for nt2 in includes:
self.includes[nt2].add(nt)

def compute_follow_sets(self):
R = lambda nt: self.includes[nt]
G = lambda nt: self.read_sets[nt]
self.follow_sets = digraph(self.nonterminal_transitions, R, G)

def compute_lookaheads(self):
read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)

for nt, lookbacks in self.lookback.items():
for state, rule in lookbacks:
for s in self.follow_sets[nt]:
for s in follow_sets[nt]:
state.lookaheads[s].add(rule)

def compute_lalr1_states(self):
m = {}
for state in self.states:
for state in self.lr0_states:
actions = {}
for la, next_state in state.transitions.items():
actions[la] = (Shift, next_state.closure)
@@ -281,3 +279,10 @@ class LALR_Analyzer(GrammarAnalyzer):
self.parse_table = self._parse_table
else:
self.parse_table = IntParseTable.from_ParseTable(self._parse_table)

def compute_lalr(self):
self.compute_lr0_states()
self.compute_reads_relations()
self.compute_includes_lookback()
self.compute_lookaheads()
self.compute_lalr1_states()

+ 3
- 15
lark/parsers/lalr_parser.py View File

@@ -8,8 +8,6 @@ from ..utils import Enumerator, Serialize

from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable

import time


###{standalone
class LALR_Parser(object):
@@ -17,13 +15,7 @@ class LALR_Parser(object):
assert all(r.options is None or r.options.priority is None
for r in parser_conf.rules), "LALR doesn't yet support prioritization"
analysis = LALR_Analyzer(parser_conf, debug=debug)
analysis.compute_lr0_states()
analysis.compute_reads_relations()
analysis.compute_read_sets()
analysis.compute_includes_lookback()
analysis.compute_follow_sets()
analysis.compute_lookaheads()
analysis.compute_lalr1_states()
analysis.compute_lalr()
callbacks = parser_conf.callbacks

self._parse_table = analysis.parse_table
@@ -88,11 +80,6 @@ class _Parser:
state_stack.append(new_state)
value_stack.append(value)

if state_stack[-1] == end_state:
return True

return False

# Main LALR-parser loop
for token in stream:
while True:
@@ -111,7 +98,8 @@ class _Parser:
while True:
_action, arg = get_action(token)
assert(_action is Reduce)
if reduce(arg):
reduce(arg)
if state_stack[-1] == end_state:
return value_stack[-1]

###}

Loading…
Cancel
Save