Pārlūkot izejas kodu

Merge branch 'myearley'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan pirms 7 gadiem
vecāks
revīzija
499eb19c77
8 mainītis faili ar 488 papildinājumiem un 329 dzēšanām
  1. +1
    -1
      lark/common.py
  2. +7
    -2
      lark/lexer.py
  3. +68
    -25
      lark/parser_frontends.py
  4. +83
    -140
      lark/parsers/earley.py
  5. +156
    -0
      lark/parsers/grammar_analysis.py
  6. +8
    -153
      lark/parsers/lalr_analysis.py
  7. +10
    -8
      lark/parsers/lalr_parser.py
  8. +155
    -0
      lark/parsers/nearley.py

+ 1
- 1
lark/common.py Parādīt failu

@@ -28,7 +28,7 @@ class UnexpectedToken(ParseError):


def is_terminal(sym):
return sym.isupper() or sym[0] == '$'
return isinstance(sym, tuple) or sym.isupper() or sym[0] == '$'


class LexerConf:


+ 7
- 2
lark/lexer.py Parādīt failu

@@ -197,14 +197,19 @@ class ContextualLexer:

self.root_lexer = Lexer(tokens, ignore=ignore)

def lex(self, stream, parser):
self.set_parser_state(None) # Needs to be set on the outside

def set_parser_state(self, state):
self.parser_state = state

def lex(self, stream):
lex_pos = 0
line = 1
col_start_pos = 0
newline_types = list(self.root_lexer.newline_types)
ignore_types = list(self.root_lexer.ignore_types)
while True:
lexer = self.lexers[parser.state]
lexer = self.lexers[self.parser_state]
for mre, type_from_index in lexer.mres:
m = mre.match(stream, lex_pos)
if m:


+ 68
- 25
lark/parser_frontends.py Parādīt failu

@@ -1,11 +1,11 @@
import re
import sre_parse

from .lexer import Lexer, ContextualLexer
from .parsers.lalr_analysis import GrammarAnalyzer
from .lexer import Lexer, ContextualLexer, Token

from .common import is_terminal, GrammarError
from .parsers import lalr_parser, earley
from .common import is_terminal, GrammarError, ParserConf
from .parsers import lalr_parser, earley, nearley
from .parsers.grammar_analysis import Rule

class WithLexer:
def __init__(self, lexer_conf):
@@ -22,11 +22,9 @@ class WithLexer:
class LALR(WithLexer):
def __init__(self, lexer_conf, parser_conf):
WithLexer.__init__(self, lexer_conf)
self.parser_conf = parser_conf

analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
analyzer.analyze()
self.parser = lalr_parser.Parser(analyzer, parser_conf.callback)
self.parser_conf = parser_conf
self.parser = lalr_parser.Parser(parser_conf)

def parse(self, text):
tokens = list(self.lex(text))
@@ -37,41 +35,35 @@ class LALR_ContextualLexer:
self.lexer_conf = lexer_conf
self.parser_conf = parser_conf

self.analyzer = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
self.analyzer.analyze()
self.parser = lalr_parser.Parser(parser_conf)

d = {idx:t.keys() for idx, t in self.analyzer.states_idx.items()}
d = {idx:t.keys() for idx, t in self.parser.analysis.states_idx.items()}
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore,
always_accept=lexer_conf.postlex.always_accept
if lexer_conf.postlex else ())


def parse(self, text):
parser = lalr_parser.Parser(self.analyzer, self.parser_conf.callback)
tokens = self.lexer.lex(text, parser)
tokens = self.lexer.lex(text)
if self.lexer_conf.postlex:
tokens = self.lexer_conf.postlex.process(tokens)
return parser.parse(tokens, True)
return self.parser.parse(tokens, self.lexer.set_parser_state)



class Earley(WithLexer):
class Nearley(WithLexer):
def __init__(self, lexer_conf, parser_conf):
WithLexer.__init__(self, lexer_conf)

rules = [{'name':n,
'symbols': list(self._prepare_expansion(x)),
'symbols': self._prepare_expansion(x),
'postprocess': getattr(parser_conf.callback, a)}
for n,x,a in parser_conf.rules]

self.parser = earley.Parser(rules, parser_conf.start)
self.parser = nearley.Parser(rules, parser_conf.start)

def _prepare_expansion(self, expansion):
for sym in expansion:
if is_terminal(sym):
yield sym, None
else:
yield sym
return [(sym, None) if is_terminal(sym) else sym for sym in expansion]

def parse(self, text):
tokens = list(self.lex(text))
@@ -79,7 +71,27 @@ class Earley(WithLexer):
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
return res[0]

class Earley_NoLex:

class Earley(WithLexer):
def __init__(self, lexer_conf, parser_conf):
WithLexer.__init__(self, lexer_conf)

rules = [(n, self._prepare_expansion(x), a)
for n,x,a in parser_conf.rules]

self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))

def _prepare_expansion(self, expansion):
return [(sym,) if is_terminal(sym) else sym for sym in expansion]

def parse(self, text):
tokens = list(self.lex(text))
res = self.parser.parse(tokens)
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
return res[0]


class Nearley_NoLex:
def __init__(self, lexer_conf, parser_conf):
self.token_by_name = {t.name:t for t in lexer_conf.tokens}

@@ -88,7 +100,7 @@ class Earley_NoLex:
'postprocess': getattr(parser_conf.callback, a)}
for n,x,a in parser_conf.rules]

self.parser = earley.Parser(rules, parser_conf.start)
self.parser = nearley.Parser(rules, parser_conf.start)

def _prepare_expansion(self, expansion):
for sym in expansion:
@@ -106,4 +118,35 @@ class Earley_NoLex:
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
return res[0]

ENGINE_DICT = { 'lalr': LALR, 'earley': Earley, 'earley_nolex': Earley_NoLex, 'lalr_contextual_lexer': LALR_ContextualLexer }

class Earley_NoLex:
def __init__(self, lexer_conf, parser_conf):
self.token_by_name = {t.name:t for t in lexer_conf.tokens}

rules = [(n, list(self._prepare_expansion(x)), a)
for n,x,a in parser_conf.rules]

self.parser = earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start))

def _prepare_expansion(self, expansion):
for sym in expansion:
if is_terminal(sym):
regexp = self.token_by_name[sym].to_regexp()
width = sre_parse.parse(regexp).getwidth()
if not width == (1,1):
raise GrammarError('Dynamic lexing requires all tokens to have a width of 1 (%s is %s)' % (regexp, width))
yield (re.compile(regexp).match,)
else:
yield sym

def parse(self, text):
res = self.parser.parse([Token(x,x) for x in text]) # A little hacky perhaps!
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
return res[0]

ENGINE_DICT = {
'lalr': LALR,
'earley': Earley,
'earley_nolex': Earley_NoLex,
'lalr_contextual_lexer': LALR_ContextualLexer
}

+ 83
- 140
lark/parsers/earley.py Parādīt failu

@@ -1,155 +1,98 @@
"My name is Earley"
from ..common import ParseError, UnexpectedToken, is_terminal
from .grammar_analysis import GrammarAnalyzer

from ..utils import classify, STRING_TYPE
from ..common import ParseError, UnexpectedToken
class Item:
def __init__(self, rule, ptr, start, data):
self.rule = rule
self.ptr = ptr
self.start = start
self.data = data

try:
xrange
except NameError:
xrange = range
@property
def expect(self):
return self.rule.expansion[self.ptr]

class MatchFailed(object):
pass
@property
def is_complete(self):
return self.ptr == len(self.rule.expansion)

class AbortParseMatch(Exception):
pass
def advance(self, data):
return Item(self.rule, self.ptr+1, self.start, self.data + [data])

def __eq__(self, other):
return self.start == other.start and self.ptr == other.ptr and self.rule == other.rule
def __hash__(self):
return hash((self.rule, self.ptr, self.start))

class Rule(object):
def __init__(self, name, symbols, postprocess):
self.name = name
self.symbols = symbols
self.postprocess = postprocess

class State(object):
def __init__(self, rule, expect, reference, data=None):
self.rule = rule
self.expect = expect
self.reference = reference
self.data = data or []

self.is_complete = (self.expect == len(self.rule.symbols))
if not self.is_complete:
self.expect_symbol = self.rule.symbols[self.expect]
self.is_terminal = isinstance(self.expect_symbol, tuple)
else:
self.is_terminal = False

def next_state(self, data):
return State(self.rule, self.expect+1, self.reference, self.data + [data])

def consume_terminal(self, inp):
if not self.is_complete and self.is_terminal:
# PORT: originally tests regexp

if self.expect_symbol[1] is not None:
match = self.expect_symbol[1].match(inp)
if match:
return self.next_state(inp)

elif self.expect_symbol[0] == inp.type:
return self.next_state(inp)

def consume_nonterminal(self, inp):
if not self.is_complete and not self.is_terminal:

if self.expect_symbol == inp:
return self.next_state(inp)

def process(self, location, ind, table, rules, added_rules):

if self.is_complete:
# Completed a rule
if self.rule.postprocess:
try:
self.data = self.rule.postprocess(self.data)
except AbortParseMatch:
self.data = MatchFailed

if self.data is not MatchFailed:
for s in table[self.reference]:
x = s.consume_nonterminal(self.rule.name)
if x:
x.data[-1] = self.data
x.epsilon_closure(location, ind, table)

else:
exp = self.rule.symbols[self.expect]
if isinstance(exp, tuple):
return

for r in rules[exp]:
assert r.name == exp
if r not in added_rules:
if r.symbols:
added_rules.add(r)
State(r, 0, location).epsilon_closure(location, ind, table)
class Parser:
def __init__(self, parser_conf):
self.analysis = GrammarAnalyzer(parser_conf.rules, parser_conf.start)
self.start = parser_conf.start

self.postprocess = {}
self.predictions = {}
for rule in self.analysis.rules:
if rule.origin != '$root': # XXX kinda ugly
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [(x.rule, x.index) for x in self.analysis.expand_rule(rule.origin)]

def parse(self, stream):
# Define parser functions

def predict(symbol, i):
assert not is_terminal(symbol), symbol
return {Item(rule, index, i, []) for rule, index in self.predictions[symbol]}

def complete(item, table):
item.data = self.postprocess[item.rule](item.data)
return {old_item.advance(item.data) for old_item in table[item.start]
if not old_item.is_complete and old_item.expect == item.rule.origin}

def process_column(i, term):
assert i == len(table)-1
cur_set = table[i]
next_set = set()

to_process = cur_set
while to_process:
new_items = set()
for item in to_process:
if item.is_complete:
new_items |= complete(item, table)
else:
# Empty rule
new_copy = self.consume_nonterminal(r.name)
new_copy.data[-1] = r.postprocess([]) if r.postprocess else []
if is_terminal(item.expect):
# scan
match = item.expect[0](term) if callable(item.expect[0]) else item.expect[0] == term
if match:
next_set.add(item.advance(stream[i]))
else:
if item.ptr: # part of an already predicted batch
new_items |= predict(item.expect, i)

new_copy.epsilon_closure(location, ind, table)
to_process = new_items - cur_set # TODO: is this precaution necessary?
cur_set |= to_process

def epsilon_closure(self, location, ind, table):
col = table[location]
col.append(self)

if not self.is_complete:
for i in xrange(ind):
state = col[i]
if state.is_complete and state.reference == location:
x = self.consume_nonterminal(state.rule.name)
if x:
x.data[-1] = state.data
x.epsilon_closure(location, ind, table)
if not next_set and term != '$end':
expect = filter(is_terminal, [x.expect for x in cur_set if not x.is_complete])
raise UnexpectedToken(term, expect, stream, i)

table.append(next_set)

class Parser(object):
def __init__(self, rules, start=None):
self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
self.rules_by_name = classify(self.rules, lambda r: r.name)
self.start = start or self.rules[0].name
# Main loop starts
table = [predict(self.start, 0)]

def advance_to(self, table, added_rules):
n = len(table)-1
for w, s in enumerate(table[n]):
s.process(n, w, table, self.rules_by_name, added_rules)
for i, char in enumerate(stream):
process_column(i, char.type)

def parse(self, stream):
initial_rules = set(self.rules_by_name[self.start])
table = [[State(r, 0, 0) for r in initial_rules]]
self.advance_to(table, initial_rules)

i = 0

while i < len(stream):
col = []

token = stream[i]
for s in table[-1]:
x = s.consume_terminal(token)
if x:
col.append(x)

if not col:
expected = {s.expect_symbol for s in table[-1] if s.is_terminal}
raise UnexpectedToken(stream[i], expected, stream, i)

table.append(col)
self.advance_to(table, set())

i += 1

res = list(self.finish(table))
if not res:
raise ParseError('Incomplete parse')
return res

def finish(self, table):
for t in table[-1]:
if (t.rule.name == self.start
and t.expect == len(t.rule.symbols)
and t.reference == 0
and t.data is not MatchFailed):
yield t.data
process_column(len(stream), '$end')

# Parse ended. Now build a parse tree
solutions = [n.data for n in table[len(stream)]
if n.is_complete and n.rule.origin==self.start and n.start==0]

if not solutions:
raise ParseError('Incomplete parse: Could not find a solution to input')

return solutions

+ 156
- 0
lark/parsers/grammar_analysis.py Parādīt failu

@@ -0,0 +1,156 @@

from ..utils import bfs, fzset
from ..common import GrammarError, is_terminal

class Rule(object):
"""
origin : a symbol
expansion : a list of symbols
"""
def __init__(self, origin, expansion, alias=None):
self.origin = origin
self.expansion = expansion
self.alias = alias

def __repr__(self):
return '<%s : %s>' % (self.origin, ' '.join(self.expansion))

class RulePtr(object):
def __init__(self, rule, index):
assert isinstance(rule, Rule)
assert index <= len(rule.expansion)
self.rule = rule
self.index = index

def __repr__(self):
before = self.rule.expansion[:self.index]
after = self.rule.expansion[self.index:]
return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))

@property
def next(self):
return self.rule.expansion[self.index]

def advance(self, sym):
assert self.next == sym
return RulePtr(self.rule, self.index+1)

@property
def is_satisfied(self):
return self.index == len(self.rule.expansion)

def __eq__(self, other):
return self.rule == other.rule and self.index == other.index
def __hash__(self):
return hash((self.rule, self.index))


def pairs(lst):
return zip(lst[:-1], lst[1:])

def update_set(set1, set2):
copy = set(set1)
set1 |= set2
return set1 != copy

def calculate_sets(rules):
"""Calculate FOLLOW sets.

Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
symbols.add('$root') # what about other unused rules?

# foreach grammar rule X ::= Y(1) ... Y(k)
# if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
# NULLABLE = NULLABLE union {X}
# for i = 1 to k
# if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
# FIRST(X) = FIRST(X) union FIRST(Y(i))
# for j = i+1 to k
# if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
# if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
# until none of NULLABLE,FIRST,FOLLOW changed in last iteration

NULLABLE = set()
FIRST = {}
FOLLOW = {}
for sym in symbols:
FIRST[sym]={sym} if is_terminal(sym) else set()
FOLLOW[sym]=set()

changed = True
while changed:
changed = False

for rule in rules:
if set(rule.expansion) <= NULLABLE:
if update_set(NULLABLE, {rule.origin}):
changed = True

for i, sym in enumerate(rule.expansion):
if set(rule.expansion[:i]) <= NULLABLE:
if update_set(FIRST[rule.origin], FIRST[sym]):
changed = True
if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
changed = True

for j in range(i+1, len(rule.expansion)):
if set(rule.expansion[i+1:j]) <= NULLABLE:
if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
changed = True

return FIRST, FOLLOW, NULLABLE


class GrammarAnalyzer(object):
def __init__(self, rule_tuples, start_symbol, debug=False):
self.start_symbol = start_symbol
self.debug = debug
rule_tuples = list(rule_tuples)
rule_tuples.append(('$root', [start_symbol, '$end']))
rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]

self.rules = set()
self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
for origin, exp, alias in rule_tuples:
r = Rule( origin, exp, alias )
self.rules.add(r)
self.rules_by_origin[origin].append(r)

for r in self.rules:
for sym in r.expansion:
if not (is_terminal(sym) or sym in self.rules_by_origin):
raise GrammarError("Using an undefined rule: %s" % sym)

self.init_state = self.expand_rule(start_symbol)

self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(self.rules)

def expand_rule(self, rule):
"Returns all init_ptrs accessible by rule (recursive)"
init_ptrs = set()
def _expand_rule(rule):
assert not is_terminal(rule), rule

for r in self.rules_by_origin[rule]:
init_ptr = RulePtr(r, 0)
init_ptrs.add(init_ptr)

if r.expansion: # if not empty rule
new_r = init_ptr.next
if not is_terminal(new_r):
yield new_r

_ = list(bfs([rule], _expand_rule))

return fzset(init_ptrs)

def _first(self, r):
if is_terminal(r):
return {r}
else:
return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}


+ 8
- 153
lark/parsers/lalr_analysis.py Parādīt failu

@@ -1,162 +1,16 @@
import logging
from collections import defaultdict, deque
from collections import defaultdict

from ..utils import classify, classify_bool, bfs, fzset
from ..common import GrammarError, is_terminal

from .grammar_analysis import GrammarAnalyzer

ACTION_SHIFT = 0

class Rule(object):
"""
origin : a symbol
expansion : a list of symbols
"""
def __init__(self, origin, expansion, alias=None):
self.origin = origin
self.expansion = expansion
self.alias = alias

def __repr__(self):
return '<%s : %s>' % (self.origin, ' '.join(self.expansion))

class RulePtr(object):
def __init__(self, rule, index):
assert isinstance(rule, Rule)
assert index <= len(rule.expansion)
self.rule = rule
self.index = index

def __repr__(self):
before = self.rule.expansion[:self.index]
after = self.rule.expansion[self.index:]
return '<%s : %s * %s>' % (self.rule.origin, ' '.join(before), ' '.join(after))

@property
def next(self):
return self.rule.expansion[self.index]

def advance(self, sym):
assert self.next == sym
return RulePtr(self.rule, self.index+1)

@property
def is_satisfied(self):
return self.index == len(self.rule.expansion)

def __eq__(self, other):
return self.rule == other.rule and self.index == other.index
def __hash__(self):
return hash((self.rule, self.index))


def pairs(lst):
return zip(lst[:-1], lst[1:])

def update_set(set1, set2):
copy = set(set1)
set1 |= set2
return set1 != copy

class GrammarAnalyzer(object):
def __init__(self, rule_tuples, start_symbol, debug=False):
self.start_symbol = start_symbol
self.debug = debug
rule_tuples = list(rule_tuples)
rule_tuples.append(('$root', [start_symbol, '$end']))
rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples]

self.rules = set()
self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples}
for origin, exp, alias in rule_tuples:
r = Rule( origin, exp, alias )
self.rules.add(r)
self.rules_by_origin[origin].append(r)

for r in self.rules:
for sym in r.expansion:
if not (is_terminal(sym) or sym in self.rules_by_origin):
raise GrammarError("Using an undefined rule: %s" % sym)

self.init_state = self.expand_rule(start_symbol)

def expand_rule(self, rule):
"Returns all init_ptrs accessible by rule (recursive)"
init_ptrs = set()
def _expand_rule(rule):
assert not is_terminal(rule)

for r in self.rules_by_origin[rule]:
init_ptr = RulePtr(r, 0)
init_ptrs.add(init_ptr)

if r.expansion: # if not empty rule
new_r = init_ptr.next
if not is_terminal(new_r):
yield new_r

_ = list(bfs([rule], _expand_rule))

return fzset(init_ptrs)

def _first(self, r):
if is_terminal(r):
return {r}
else:
return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)}

def _calc(self):
"""Calculate FOLLOW sets.

Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
symbols = {sym for rule in self.rules for sym in rule.expansion} | {rule.origin for rule in self.rules}
symbols.add('$root') # what about other unused rules?

# foreach grammar rule X ::= Y(1) ... Y(k)
# if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
# NULLABLE = NULLABLE union {X}
# for i = 1 to k
# if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
# FIRST(X) = FIRST(X) union FIRST(Y(i))
# for j = i+1 to k
# if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
# if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
# FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
# until none of NULLABLE,FIRST,FOLLOW changed in last iteration

NULLABLE = set()
FIRST = {}
FOLLOW = {}
for sym in symbols:
FIRST[sym]={sym} if is_terminal(sym) else set()
FOLLOW[sym]=set()

changed = True
while changed:
changed = False

for rule in self.rules:
if set(rule.expansion) <= NULLABLE:
if update_set(NULLABLE, {rule.origin}):
changed = True

for i, sym in enumerate(rule.expansion):
if set(rule.expansion[:i]) <= NULLABLE:
if update_set(FIRST[rule.origin], FIRST[sym]):
changed = True
if i==len(rule.expansion)-1 or set(rule.expansion[i:]) <= NULLABLE:
if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
changed = True

for j in range(i+1, len(rule.expansion)):
if set(rule.expansion[i+1:j]) <= NULLABLE:
if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
changed = True

self.FOLLOW = FOLLOW

def analyze(self):
self._calc()
class LALR_Analyzer(GrammarAnalyzer):

def compute_lookahead(self):

self.states = {}
def step(state):
@@ -188,7 +42,8 @@ class GrammarAnalyzer(object):
lookahead[k] = [x]

for k, v in lookahead.items():
assert len(v) == 1, ("Collision", k, v)
if not len(v) == 1:
raise GrammarError("Collision in %s: %s" %(k, v))

self.states[state] = {k:v[0] for k, v in lookahead.items()}



+ 10
- 8
lark/parsers/lalr_parser.py Parādīt failu

@@ -1,15 +1,15 @@
from .lalr_analysis import ACTION_SHIFT
from ..common import ParseError, UnexpectedToken

from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT

class Parser(object):
def __init__(self, analysis, callback):
self.analysis = analysis
self.callbacks = {rule: getattr(callback, rule.alias or rule.origin, None)
for rule in analysis.rules}
self.state = self.analysis.init_state_idx
def __init__(self, parser_conf):
self.analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start)
self.analysis.compute_lookahead()
self.callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
for rule in self.analysis.rules}

def parse(self, seq, set_state=False):
def parse(self, seq, set_state=None):
i = 0
stream = iter(seq)
states_idx = self.analysis.states_idx
@@ -17,6 +17,8 @@ class Parser(object):
state_stack = [self.analysis.init_state_idx]
value_stack = []

if set_state: set_state(self.analysis.init_state_idx)

def get_action(key):
state = state_stack[-1]
try:
@@ -54,7 +56,7 @@ class Parser(object):
if action == ACTION_SHIFT:
state_stack.append(arg)
value_stack.append(token)
if set_state: self.state = arg
if set_state: set_state(arg)
token = next(stream)
i += 1
else:


+ 155
- 0
lark/parsers/nearley.py Parādīt failu

@@ -0,0 +1,155 @@
"My name is Earley"

from ..utils import classify, STRING_TYPE
from ..common import ParseError, UnexpectedToken

try:
xrange
except NameError:
xrange = range

class MatchFailed(object):
pass

class AbortParseMatch(Exception):
pass


class Rule(object):
def __init__(self, name, symbols, postprocess):
self.name = name
self.symbols = symbols
self.postprocess = postprocess

class State(object):
def __init__(self, rule, expect, reference, data=None):
self.rule = rule
self.expect = expect
self.reference = reference
self.data = data or []

self.is_complete = (self.expect == len(self.rule.symbols))
if not self.is_complete:
self.expect_symbol = self.rule.symbols[self.expect]
self.is_terminal = isinstance(self.expect_symbol, tuple)
else:
self.is_terminal = False

def next_state(self, data):
return State(self.rule, self.expect+1, self.reference, self.data + [data])

def consume_terminal(self, inp):
if not self.is_complete and self.is_terminal:
# PORT: originally tests regexp

if self.expect_symbol[1] is not None:
match = self.expect_symbol[1].match(inp)
if match:
return self.next_state(inp)

elif self.expect_symbol[0] == inp.type:
return self.next_state(inp)

def consume_nonterminal(self, inp):
if not self.is_complete and not self.is_terminal:

if self.expect_symbol == inp:
return self.next_state(inp)

def process(self, location, ind, table, rules, added_rules):

if self.is_complete:
# Completed a rule
if self.rule.postprocess:
try:
self.data = self.rule.postprocess(self.data)
except AbortParseMatch:
self.data = MatchFailed

if self.data is not MatchFailed:
for s in table[self.reference]:
x = s.consume_nonterminal(self.rule.name)
if x:
x.data[-1] = self.data
x.epsilon_closure(location, ind, table)

else:
exp = self.rule.symbols[self.expect]
if isinstance(exp, tuple):
return

for r in rules[exp]:
assert r.name == exp
if r not in added_rules:
if r.symbols:
added_rules.add(r)
State(r, 0, location).epsilon_closure(location, ind, table)
else:
# Empty rule
new_copy = self.consume_nonterminal(r.name)
new_copy.data[-1] = r.postprocess([]) if r.postprocess else []

new_copy.epsilon_closure(location, ind, table)

def epsilon_closure(self, location, ind, table):
col = table[location]
col.append(self)

if not self.is_complete:
for i in xrange(ind):
state = col[i]
if state.is_complete and state.reference == location:
x = self.consume_nonterminal(state.rule.name)
if x:
x.data[-1] = state.data
x.epsilon_closure(location, ind, table)


class Parser(object):
def __init__(self, rules, start=None):
self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
self.rules_by_name = classify(self.rules, lambda r: r.name)
self.start = start or self.rules[0].name

def advance_to(self, table, added_rules):
n = len(table)-1
for w, s in enumerate(table[n]):
s.process(n, w, table, self.rules_by_name, added_rules)

def parse(self, stream):
initial_rules = set(self.rules_by_name[self.start])
table = [[State(r, 0, 0) for r in initial_rules]]
self.advance_to(table, initial_rules)

i = 0

while i < len(stream):
col = []

token = stream[i]
for s in table[-1]:
x = s.consume_terminal(token)
if x:
col.append(x)

if not col:
expected = {s.expect_symbol for s in table[-1] if s.is_terminal}
raise UnexpectedToken(stream[i], expected, stream, i)

table.append(col)
self.advance_to(table, set())

i += 1

res = list(self.finish(table))
if not res:
raise ParseError('Incomplete parse')
return res

def finish(self, table):
for t in table[-1]:
if (t.rule.name == self.start
and t.expect == len(t.rule.symbols)
and t.reference == 0
and t.data is not MatchFailed):
yield t.data

Notiek ielāde…
Atcelt
Saglabāt