Browse Source

Refactoring for LALR, added the ParseTable class

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.2
Erez Shinan 6 years ago
parent
commit
0ee80e675a
4 changed files with 61 additions and 33 deletions
  1. +1
    -1
      lark/parser_frontends.py
  2. +1
    -1
      lark/parsers/grammar_analysis.py
  3. +44
    -17
      lark/parsers/lalr_analysis.py
  4. +15
    -14
      lark/parsers/lalr_parser.py

+ 1
- 1
lark/parser_frontends.py View File

@@ -38,7 +38,7 @@ class LALR_ContextualLexer:

self.parser = lalr_parser.Parser(parser_conf)

d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()}
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)



+ 1
- 1
lark/parsers/grammar_analysis.py View File

@@ -125,7 +125,7 @@ class GrammarAnalyzer(object):
if not (is_terminal(sym) or sym in self.rules_by_origin):
raise GrammarError("Using an undefined rule: %s" % sym)

self.init_state = self.expand_rule('$root')
self.start_state = self.expand_rule('$root')

self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules)



+ 44
- 17
lark/parsers/lalr_analysis.py View File

@@ -14,7 +14,41 @@ from ..common import GrammarError, is_terminal

from .grammar_analysis import GrammarAnalyzer

ACTION_SHIFT = 0
class Action:
def __str__(self):
return self.__name__
def __repr__(self):
return str(self)

class Shift(Action): pass
class Reduce(Action): pass

class ParseTable:
def __init__(self, states, start_state, end_state):
self.states = states
self.start_state = start_state
self.end_state = end_state

class IntParseTable(ParseTable):

@classmethod
def from_ParseTable(cls, parse_table):
enum = list(parse_table.states)
state_to_idx = {s:i for i,s in enumerate(enum)}
int_states = {}

for s, la in parse_table.states.items():
la = {k:(v[0], state_to_idx[v[1]]) if v[0] is Shift else v
for k,v in la.items()}
int_states[ state_to_idx[s] ] = la


start_state = state_to_idx[parse_table.start_state]
end_state = state_to_idx[parse_table.end_state]
return cls(int_states, start_state, end_state)




class LALR_Analyzer(GrammarAnalyzer):

@@ -27,7 +61,7 @@ class LALR_Analyzer(GrammarAnalyzer):
sat, unsat = classify_bool(state, lambda rp: rp.is_satisfied)
for rp in sat:
for term in self.FOLLOW.get(rp.rule.origin, ()):
lookahead[term].append(('reduce', rp.rule))
lookahead[term].append((Reduce, rp.rule))

d = classify(unsat, lambda rp: rp.next)
for sym, rps in d.items():
@@ -38,7 +72,7 @@ class LALR_Analyzer(GrammarAnalyzer):
rps |= self.expand_rule(rp.next)

new_state = fzset(rps)
lookahead[sym].append(('shift', new_state))
lookahead[sym].append((Shift, new_state))
if sym == '$end':
self.end_states.append( new_state )
yield fzset(rps)
@@ -50,7 +84,7 @@ class LALR_Analyzer(GrammarAnalyzer):
for x in v:
# XXX resolving shift/reduce into shift, like PLY
# Give a proper warning
if x[0] == 'shift':
if x[0] is Shift:
lookahead[k] = [x]

for k, v in lookahead.items():
@@ -59,22 +93,15 @@ class LALR_Analyzer(GrammarAnalyzer):

self.states[state] = {k:v[0] for k, v in lookahead.items()}

for _ in bfs([self.init_state], step):
for _ in bfs([self.start_state], step):
pass

self.end_state ,= self.end_states

# --
self.enum = list(self.states)
self.enum_rev = {s:i for i,s in enumerate(self.enum)}
self.states_idx = {}

for s, la in self.states.items():
la = {k:(ACTION_SHIFT, self.enum_rev[v[1]]) if v[0]=='shift'
else (v[0], (v[1], len(v[1].expansion))) # Reduce
for k,v in la.items()}
self.states_idx[ self.enum_rev[s] ] = la
self._parse_table = ParseTable(self.states, self.start_state, self.end_state)

if self.debug:
self.parse_table = self._parse_table
else:
self.parse_table = IntParseTable.from_ParseTable(self._parse_table)

self.init_state_idx = self.enum_rev[self.init_state]
self.end_state_idx = self.enum_rev[self.end_state]

+ 15
- 14
lark/parsers/lalr_parser.py View File

@@ -5,7 +5,7 @@

from ..common import ParseError, UnexpectedToken

from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT
from .lalr_analysis import LALR_Analyzer, Shift

class FinalReduce:
def __init__(self, value):
@@ -19,14 +19,14 @@ class Parser:
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
for rule in analysis.rules}

self.parser = _Parser(analysis.states_idx, analysis.init_state_idx, analysis.end_state_idx, callbacks)
self.parser = _Parser(analysis.parse_table, callbacks)
self.parse = self.parser.parse

class _Parser:
def __init__(self, states, init_state, end_state, callbacks):
self.states = states
self.init_state = init_state
self.end_state = end_state
def __init__(self, parse_table, callbacks):
self.states = parse_table.states
self.start_state = parse_table.start_state
self.end_state = parse_table.end_state
self.callbacks = callbacks

def parse(self, seq, set_state=None):
@@ -35,10 +35,10 @@ class _Parser:
stream = iter(seq)
states = self.states

state_stack = [self.init_state]
state_stack = [self.start_state]
value_stack = []

if set_state: set_state(self.init_state)
if set_state: set_state(self.start_state)

def get_action(key):
state = state_stack[-1]
@@ -49,7 +49,8 @@ class _Parser:

raise UnexpectedToken(token, expected, seq, i)

def reduce(rule, size):
def reduce(rule):
size = len(rule.expansion)
if size:
s = value_stack[-size:]
del state_stack[-size:]
@@ -60,7 +61,7 @@ class _Parser:
value = self.callbacks[rule](s)

_action, new_state = get_action(rule.origin)
assert _action == ACTION_SHIFT
assert _action is Shift
state_stack.append(new_state)
value_stack.append(value)

@@ -72,22 +73,22 @@ class _Parser:
action, arg = get_action(token.type)
assert arg != self.end_state

if action == ACTION_SHIFT:
if action is Shift:
state_stack.append(arg)
value_stack.append(token)
if set_state: set_state(arg)
token = next(stream)
i += 1
else:
reduce(*arg)
reduce(arg)
except StopIteration:
pass

while True:
_action, arg = get_action('$end')
if _action == ACTION_SHIFT:
if _action is Shift:
assert arg == self.end_state
val ,= value_stack
return val
else:
reduce(*arg)
reduce(arg)

Loading…
Cancel
Save