Browse Source

Cleanup

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Shinan 5 years ago
parent
commit
8466981c08
4 changed files with 40 additions and 64 deletions
  1. +1
    -2
      lark/grammar.py
  2. +9
    -25
      lark/parsers/grammar_analysis.py
  3. +27
    -22
      lark/parsers/lalr_analysis.py
  4. +3
    -15
      lark/parsers/lalr_parser.py

+ 1
- 2
lark/grammar.py View File

@@ -71,7 +71,7 @@ class Rule(Serialize):
expansion : a list of symbols expansion : a list of symbols
order : index of this expansion amongst all rules of the same name order : index of this expansion amongst all rules of the same name
""" """
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp')
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')


__serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
__serialize_namespace__ = Terminal, NonTerminal, RuleOptions __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
@@ -83,7 +83,6 @@ class Rule(Serialize):
self.order = order self.order = order
self.options = options self.options = options
self._hash = hash((self.origin, tuple(self.expansion))) self._hash = hash((self.origin, tuple(self.expansion)))
self._rp = None


def _deserialize(self): def _deserialize(self):
self._hash = hash((self.origin, tuple(self.expansion))) self._hash = hash((self.origin, tuple(self.expansion)))


+ 9
- 25
lark/parsers/grammar_analysis.py View File

@@ -3,20 +3,16 @@ from collections import Counter, defaultdict
from ..utils import bfs, fzset, classify from ..utils import bfs, fzset, classify
from ..exceptions import GrammarError from ..exceptions import GrammarError
from ..grammar import Rule, Terminal, NonTerminal from ..grammar import Rule, Terminal, NonTerminal
import time




# optimizations were made so that there should never be two distinct equal RulePtrs
# to help with hashtable lookup
class RulePtr(object): class RulePtr(object):
__slots__ = ('rule', 'index', '_advance')
__slots__ = ('rule', 'index')


def __init__(self, rule, index): def __init__(self, rule, index):
assert isinstance(rule, Rule) assert isinstance(rule, Rule)
assert index <= len(rule.expansion) assert index <= len(rule.expansion)
self.rule = rule self.rule = rule
self.index = index self.index = index
self._advance = None


def __repr__(self): def __repr__(self):
before = [x.name for x in self.rule.expansion[:self.index]] before = [x.name for x in self.rule.expansion[:self.index]]
@@ -27,19 +23,19 @@ class RulePtr(object):
def next(self): def next(self):
return self.rule.expansion[self.index] return self.rule.expansion[self.index]


# don't create duplicate RulePtrs
def advance(self, sym): def advance(self, sym):
assert self.next == sym assert self.next == sym
a = self._advance
if a is None:
a = RulePtr(self.rule, self.index + 1)
self._advance = a
return a
return RulePtr(self.rule, self.index+1)


@property @property
def is_satisfied(self): def is_satisfied(self):
return self.index == len(self.rule.expansion) return self.index == len(self.rule.expansion)


def __eq__(self, other):
return self.rule == other.rule and self.index == other.index
def __hash__(self):
return hash((self.rule, self.index))



# state generation ensures no duplicate LR0ItemSets # state generation ensures no duplicate LR0ItemSets
class LR0ItemSet(object): class LR0ItemSet(object):
@@ -159,19 +155,11 @@ class GrammarAnalyzer(object):
self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin) self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)


# cache RulePtr(r, 0) in r (no duplicate RulePtr objects) # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
for root_rule in lr0_root_rules.values():
root_rule._rp = RulePtr(root_rule, 0)
self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
for start, root_rule in lr0_root_rules.items()} for start, root_rule in lr0_root_rules.items()}


self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)


self.nonterminal_transitions = []
self.directly_reads = defaultdict(set)
self.reads = defaultdict(set)
self.includes = defaultdict(set)
self.lookback = defaultdict(set)

def expand_rule(self, source_rule, rules_by_origin=None): def expand_rule(self, source_rule, rules_by_origin=None):
"Returns all init_ptrs accessible by rule (recursive)" "Returns all init_ptrs accessible by rule (recursive)"


@@ -183,11 +171,7 @@ class GrammarAnalyzer(object):
assert not rule.is_term, rule assert not rule.is_term, rule


for r in rules_by_origin[rule]: for r in rules_by_origin[rule]:
# don't create duplicate RulePtr objects
init_ptr = r._rp
if init_ptr is None:
init_ptr = RulePtr(r, 0)
r._rp = init_ptr
init_ptr = RulePtr(r, 0)
init_ptrs.add(init_ptr) init_ptrs.add(init_ptr)


if r.expansion: # if not empty rule if r.expansion: # if not empty rule


+ 27
- 22
lark/parsers/lalr_analysis.py View File

@@ -15,8 +15,6 @@ from ..exceptions import GrammarError
from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
from ..grammar import Rule from ..grammar import Rule


import time

###{standalone ###{standalone


class Action: class Action:
@@ -115,8 +113,8 @@ def traverse(x, S, N, X, R, G, F):
S.append(x) S.append(x)
d = len(S) d = len(S)
N[x] = d N[x] = d
F[x] = G(x)
for y in R(x):
F[x] = G[x]
for y in R[x]:
if N[y] == 0: if N[y] == 0:
traverse(y, S, N, X, R, G, F) traverse(y, S, N, X, R, G, F)
n_x = N[x] n_x = N[x]
@@ -137,9 +135,17 @@ def traverse(x, S, N, X, R, G, F):




class LALR_Analyzer(GrammarAnalyzer): class LALR_Analyzer(GrammarAnalyzer):
def __init__(self, parser_conf, debug=False):
GrammarAnalyzer.__init__(self, parser_conf, debug)
self.nonterminal_transitions = []
self.directly_reads = defaultdict(set)
self.reads = defaultdict(set)
self.includes = defaultdict(set)
self.lookback = defaultdict(set)



def compute_lr0_states(self): def compute_lr0_states(self):
self.states = set()
self.lr0_states = set()
# map of kernels to LR0ItemSets # map of kernels to LR0ItemSets
cache = {} cache = {}


@@ -161,7 +167,7 @@ class LALR_Analyzer(GrammarAnalyzer):
state.transitions[sym] = new_state state.transitions[sym] = new_state
yield new_state yield new_state


self.states.add(state)
self.lr0_states.add(state)


for _ in bfs(self.lr0_start_states.values(), step): for _ in bfs(self.lr0_start_states.values(), step):
pass pass
@@ -174,14 +180,14 @@ class LALR_Analyzer(GrammarAnalyzer):
assert(rp.index == 0) assert(rp.index == 0)
self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ]) self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])


for state in self.states:
for state in self.lr0_states:
seen = set() seen = set()
for rp in state.closure: for rp in state.closure:
if rp.is_satisfied: if rp.is_satisfied:
continue continue
s = rp.next s = rp.next
# if s is a not a nonterminal # if s is a not a nonterminal
if not s in self.lr0_rules_by_origin:
if s not in self.lr0_rules_by_origin:
continue continue
if s in seen: if s in seen:
continue continue
@@ -201,11 +207,6 @@ class LALR_Analyzer(GrammarAnalyzer):
if s2 in self.NULLABLE: if s2 in self.NULLABLE:
r.add((next_state, s2)) r.add((next_state, s2))


def compute_read_sets(self):
R = lambda nt: self.reads[nt]
G = lambda nt: self.directly_reads[nt]
self.read_sets = digraph(self.nonterminal_transitions, R, G)

def compute_includes_lookback(self): def compute_includes_lookback(self):
for nt in self.nonterminal_transitions: for nt in self.nonterminal_transitions:
state, nonterminal = nt state, nonterminal = nt
@@ -220,9 +221,8 @@ class LALR_Analyzer(GrammarAnalyzer):
s = rp.rule.expansion[i] s = rp.rule.expansion[i]
nt2 = (state2, s) nt2 = (state2, s)
state2 = state2.transitions[s] state2 = state2.transitions[s]
if not nt2 in self.reads:
if nt2 not in self.reads:
continue continue
j = i + 1
for j in range(i + 1, len(rp.rule.expansion)): for j in range(i + 1, len(rp.rule.expansion)):
if not rp.rule.expansion[j] in self.NULLABLE: if not rp.rule.expansion[j] in self.NULLABLE:
break break
@@ -236,20 +236,18 @@ class LALR_Analyzer(GrammarAnalyzer):
for nt2 in includes: for nt2 in includes:
self.includes[nt2].add(nt) self.includes[nt2].add(nt)


def compute_follow_sets(self):
R = lambda nt: self.includes[nt]
G = lambda nt: self.read_sets[nt]
self.follow_sets = digraph(self.nonterminal_transitions, R, G)

def compute_lookaheads(self): def compute_lookaheads(self):
read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)

for nt, lookbacks in self.lookback.items(): for nt, lookbacks in self.lookback.items():
for state, rule in lookbacks: for state, rule in lookbacks:
for s in self.follow_sets[nt]:
for s in follow_sets[nt]:
state.lookaheads[s].add(rule) state.lookaheads[s].add(rule)


def compute_lalr1_states(self): def compute_lalr1_states(self):
m = {} m = {}
for state in self.states:
for state in self.lr0_states:
actions = {} actions = {}
for la, next_state in state.transitions.items(): for la, next_state in state.transitions.items():
actions[la] = (Shift, next_state.closure) actions[la] = (Shift, next_state.closure)
@@ -281,3 +279,10 @@ class LALR_Analyzer(GrammarAnalyzer):
self.parse_table = self._parse_table self.parse_table = self._parse_table
else: else:
self.parse_table = IntParseTable.from_ParseTable(self._parse_table) self.parse_table = IntParseTable.from_ParseTable(self._parse_table)

def compute_lalr(self):
self.compute_lr0_states()
self.compute_reads_relations()
self.compute_includes_lookback()
self.compute_lookaheads()
self.compute_lalr1_states()

+ 3
- 15
lark/parsers/lalr_parser.py View File

@@ -8,8 +8,6 @@ from ..utils import Enumerator, Serialize


from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable


import time



###{standalone ###{standalone
class LALR_Parser(object): class LALR_Parser(object):
@@ -17,13 +15,7 @@ class LALR_Parser(object):
assert all(r.options is None or r.options.priority is None assert all(r.options is None or r.options.priority is None
for r in parser_conf.rules), "LALR doesn't yet support prioritization" for r in parser_conf.rules), "LALR doesn't yet support prioritization"
analysis = LALR_Analyzer(parser_conf, debug=debug) analysis = LALR_Analyzer(parser_conf, debug=debug)
analysis.compute_lr0_states()
analysis.compute_reads_relations()
analysis.compute_read_sets()
analysis.compute_includes_lookback()
analysis.compute_follow_sets()
analysis.compute_lookaheads()
analysis.compute_lalr1_states()
analysis.compute_lalr()
callbacks = parser_conf.callbacks callbacks = parser_conf.callbacks


self._parse_table = analysis.parse_table self._parse_table = analysis.parse_table
@@ -88,11 +80,6 @@ class _Parser:
state_stack.append(new_state) state_stack.append(new_state)
value_stack.append(value) value_stack.append(value)


if state_stack[-1] == end_state:
return True

return False

# Main LALR-parser loop # Main LALR-parser loop
for token in stream: for token in stream:
while True: while True:
@@ -111,7 +98,8 @@ class _Parser:
while True: while True:
_action, arg = get_action(token) _action, arg = get_action(token)
assert(_action is Reduce) assert(_action is Reduce)
if reduce(arg):
reduce(arg)
if state_stack[-1] == end_state:
return value_stack[-1] return value_stack[-1]


###} ###}

Loading…
Cancel
Save