Explorar el Código

Cleaned up the LALR(1) parser

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan hace 6 años
padre
commit
5f5acca40b
Se han modificado 4 ficheros con 24 adiciones y 21 borrados
  1. +1
    -1
      lark/common.py
  2. +1
    -1
      lark/parsers/grammar_analysis.py
  3. +8
    -1
      lark/parsers/lalr_analysis.py
  4. +14
    -18
      lark/parsers/lalr_parser.py

+ 1
- 1
lark/common.py Ver fichero

@@ -33,7 +33,7 @@ class UnexpectedToken(ParseError):


def is_terminal(sym):
return isinstance(sym, Terminal) or sym.isupper() or sym[0] == '$'
return isinstance(sym, Terminal) or sym.isupper() or sym == '$end'


class LexerConf:


+ 1
- 1
lark/parsers/grammar_analysis.py Ver fichero

@@ -125,7 +125,7 @@ class GrammarAnalyzer(object):
if not (is_terminal(sym) or sym in self.rules_by_origin):
raise GrammarError("Using an undefined rule: %s" % sym)

self.init_state = self.expand_rule(start_symbol)
self.init_state = self.expand_rule('$root')

self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules)



+ 8
- 1
lark/parsers/lalr_analysis.py Ver fichero

@@ -19,6 +19,7 @@ ACTION_SHIFT = 0
class LALR_Analyzer(GrammarAnalyzer):

def compute_lookahead(self):
self.end_states = []

self.states = {}
def step(state):
@@ -36,7 +37,10 @@ class LALR_Analyzer(GrammarAnalyzer):
if not rp.is_satisfied and not is_terminal(rp.next):
rps |= self.expand_rule(rp.next)

lookahead[sym].append(('shift', fzset(rps)))
new_state = fzset(rps)
lookahead[sym].append(('shift', new_state))
if sym == '$end':
self.end_states.append( new_state )
yield fzset(rps)

for k, v in lookahead.items():
@@ -58,6 +62,8 @@ class LALR_Analyzer(GrammarAnalyzer):
for _ in bfs([self.init_state], step):
pass

self.end_state ,= self.end_states

# --
self.enum = list(self.states)
self.enum_rev = {s:i for i,s in enumerate(self.enum)}
@@ -71,3 +77,4 @@ class LALR_Analyzer(GrammarAnalyzer):


self.init_state_idx = self.enum_rev[self.init_state]
self.end_state_idx = self.enum_rev[self.end_state]

+ 14
- 18
lark/parsers/lalr_parser.py Ver fichero

@@ -19,14 +19,14 @@ class Parser:
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
for rule in analysis.rules}

self.parser = _Parser(analysis.states_idx, analysis.init_state_idx, analysis.start_symbol, callbacks)
self.parser = _Parser(analysis.states_idx, analysis.init_state_idx, analysis.end_state_idx, callbacks)
self.parse = self.parser.parse

class _Parser:
def __init__(self, states, init_state, start_symbol, callbacks):
def __init__(self, states, init_state, end_state, callbacks):
self.states = states
self.init_state = init_state
self.start_symbol = start_symbol
self.end_state = end_state
self.callbacks = callbacks

def parse(self, seq, set_state=None):
@@ -49,7 +49,7 @@ class _Parser:

raise UnexpectedToken(token, expected, seq, i)

def reduce(rule, size, end=False):
def reduce(rule, size):
if size:
s = value_stack[-size:]
del state_stack[-size:]
@@ -57,15 +57,12 @@ class _Parser:
else:
s = []

res = self.callbacks[rule](s)

if end and len(state_stack) == 1 and rule.origin == self.start_symbol:
return FinalReduce(res)
value = self.callbacks[rule](s)

_action, new_state = get_action(rule.origin)
assert _action == ACTION_SHIFT
state_stack.append(new_state)
value_stack.append(res)
value_stack.append(value)

# Main LALR-parser loop
try:
@@ -73,6 +70,7 @@ class _Parser:
i += 1
while True:
action, arg = get_action(token.type)
assert arg != self.end_state

if action == ACTION_SHIFT:
state_stack.append(arg)
@@ -86,12 +84,10 @@ class _Parser:
pass

while True:
_action, rule = get_action('$end')
assert _action == 'reduce'
res = reduce(*rule, end=True)
if isinstance(res, FinalReduce):
assert state_stack == [self.init_state] and not value_stack, len(state_stack)
return res.value



_action, arg = get_action('$end')
if _action == ACTION_SHIFT:
assert arg == self.end_state
val ,= value_stack
return val
else:
reduce(*arg)

Cargando…
Cancelar
Guardar