|
@@ -27,6 +27,44 @@ class Item: |
|
|
def __hash__(self): |
|
|
def __hash__(self): |
|
|
return hash((self.rule, self.ptr, self.start)) |
|
|
return hash((self.rule, self.ptr, self.start)) |
|
|
|
|
|
|
|
|
|
|
|
def __repr__(self): |
|
|
|
|
|
before = map(str, self.rule.expansion[:self.ptr]) |
|
|
|
|
|
after = map(str, self.rule.expansion[self.ptr:]) |
|
|
|
|
|
return '<(%d) %s : %s * %s>' % (self.start, self.rule.origin, ' '.join(before), ' '.join(after)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class NewsList(list): |
|
|
|
|
|
def __init__(self, initial=None): |
|
|
|
|
|
list.__init__(self, initial or []) |
|
|
|
|
|
self.last_iter = 0 |
|
|
|
|
|
|
|
|
|
|
|
def get_news(self): |
|
|
|
|
|
i = self.last_iter |
|
|
|
|
|
self.last_iter = len(self) |
|
|
|
|
|
return self[i:] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Column: |
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
self.to_reduce = NewsList() |
|
|
|
|
|
self.to_predict = NewsList() |
|
|
|
|
|
self.to_scan = NewsList() |
|
|
|
|
|
self.item_count = 0 |
|
|
|
|
|
|
|
|
|
|
|
def add(self, items): |
|
|
|
|
|
self.item_count += len(items) |
|
|
|
|
|
for item in items: |
|
|
|
|
|
if item.is_complete: |
|
|
|
|
|
if item not in self.to_reduce: # Avoid infinite loop |
|
|
|
|
|
self.to_reduce.append(item) |
|
|
|
|
|
else: |
|
|
|
|
|
if is_terminal(item.expect): |
|
|
|
|
|
self.to_scan.append(item) |
|
|
|
|
|
else: |
|
|
|
|
|
self.to_predict.append(item) |
|
|
|
|
|
|
|
|
|
|
|
def __nonzero__(self): |
|
|
|
|
|
return bool(self.item_count) |
|
|
|
|
|
|
|
|
class Parser: |
|
|
class Parser: |
|
|
def __init__(self, parser_conf): |
|
|
def __init__(self, parser_conf): |
|
@@ -39,53 +77,53 @@ class Parser: |
|
|
if rule.origin != '$root': # XXX kinda ugly |
|
|
if rule.origin != '$root': # XXX kinda ugly |
|
|
a = rule.alias |
|
|
a = rule.alias |
|
|
self.postprocess[rule] = a if callable(a) else getattr(parser_conf.callback, a) |
|
|
self.postprocess[rule] = a if callable(a) else getattr(parser_conf.callback, a) |
|
|
self.predictions[rule.origin] = [(x.rule, x.index) for x in self.analysis.expand_rule(rule.origin)] |
|
|
|
|
|
|
|
|
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] |
|
|
|
|
|
|
|
|
def parse(self, stream): |
|
|
def parse(self, stream): |
|
|
# Define parser functions |
|
|
# Define parser functions |
|
|
|
|
|
|
|
|
def predict(symbol, i): |
|
|
|
|
|
assert not is_terminal(symbol), symbol |
|
|
|
|
|
return {Item(rule, index, i, []) for rule, index in self.predictions[symbol]} |
|
|
|
|
|
|
|
|
def predict(nonterm, i): |
|
|
|
|
|
assert not is_terminal(nonterm), nonterm |
|
|
|
|
|
return [Item(rule, 0, i, []) for rule in self.predictions[nonterm]] |
|
|
|
|
|
|
|
|
def complete(item, table): |
|
|
def complete(item, table): |
|
|
|
|
|
name = item.rule.origin |
|
|
item.data = self.postprocess[item.rule](item.data) |
|
|
item.data = self.postprocess[item.rule](item.data) |
|
|
return {old_item.advance(item.data) for old_item in table[item.start] |
|
|
|
|
|
if not old_item.is_complete and old_item.expect == item.rule.origin} |
|
|
|
|
|
|
|
|
return [i.advance(item.data) for i in table[item.start].to_predict |
|
|
|
|
|
if i.expect == name] |
|
|
|
|
|
|
|
|
def process_column(i, token): |
|
|
def process_column(i, token): |
|
|
assert i == len(table)-1 |
|
|
assert i == len(table)-1 |
|
|
cur_set = table[i] |
|
|
cur_set = table[i] |
|
|
next_set = set() |
|
|
|
|
|
|
|
|
|
|
|
to_process = cur_set |
|
|
|
|
|
while to_process: |
|
|
|
|
|
new_items = set() |
|
|
|
|
|
for item in to_process: |
|
|
|
|
|
if item.is_complete: |
|
|
|
|
|
new_items |= complete(item, table) |
|
|
|
|
|
else: |
|
|
|
|
|
if is_terminal(item.expect): |
|
|
|
|
|
# scan |
|
|
|
|
|
match = item.expect[0](token) if callable(item.expect[0]) else item.expect[0] == token.type |
|
|
|
|
|
if match: |
|
|
|
|
|
next_set.add(item.advance(stream[i])) |
|
|
|
|
|
else: |
|
|
|
|
|
if item.ptr: # part of an already predicted batch |
|
|
|
|
|
new_items |= predict(item.expect, i) |
|
|
|
|
|
|
|
|
|
|
|
to_process = new_items - cur_set # TODO: is this precaution necessary? |
|
|
|
|
|
cur_set |= to_process |
|
|
|
|
|
|
|
|
next_set = Column() |
|
|
|
|
|
|
|
|
|
|
|
while True: |
|
|
|
|
|
to_predict = {x.expect for x in cur_set.to_predict.get_news() |
|
|
|
|
|
if x.ptr} # if not part of an already predicted batch |
|
|
|
|
|
to_reduce = cur_set.to_reduce.get_news() |
|
|
|
|
|
if not (to_predict or to_reduce): |
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
for nonterm in to_predict: |
|
|
|
|
|
cur_set.add( predict(nonterm, i) ) |
|
|
|
|
|
for item in to_reduce: |
|
|
|
|
|
cur_set.add( complete(item, table) ) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for item in cur_set.to_scan.get_news(): |
|
|
|
|
|
match = item.expect[0](token) if callable(item.expect[0]) else item.expect[0] == token.type |
|
|
|
|
|
if match: |
|
|
|
|
|
next_set.add([item.advance(stream[i])]) |
|
|
|
|
|
|
|
|
if not next_set and token.type != '$end': |
|
|
if not next_set and token.type != '$end': |
|
|
expect = filter(is_terminal, [x.expect for x in cur_set if not x.is_complete]) |
|
|
|
|
|
|
|
|
expect = [i.expect for i in cur_set.to_scan] |
|
|
raise UnexpectedToken(token, expect, stream, i) |
|
|
raise UnexpectedToken(token, expect, stream, i) |
|
|
|
|
|
|
|
|
table.append(next_set) |
|
|
table.append(next_set) |
|
|
|
|
|
|
|
|
# Main loop starts |
|
|
# Main loop starts |
|
|
table = [predict(self.start, 0)] |
|
|
|
|
|
|
|
|
table = [Column()] |
|
|
|
|
|
table[0].add(predict(self.start, 0)) |
|
|
|
|
|
|
|
|
for i, char in enumerate(stream): |
|
|
for i, char in enumerate(stream): |
|
|
process_column(i, char) |
|
|
process_column(i, char) |
|
@@ -93,8 +131,8 @@ class Parser: |
|
|
process_column(len(stream), EndToken()) |
|
|
process_column(len(stream), EndToken()) |
|
|
|
|
|
|
|
|
# Parse ended. Now build a parse tree |
|
|
# Parse ended. Now build a parse tree |
|
|
solutions = [n.data for n in table[len(stream)] |
|
|
|
|
|
if n.is_complete and n.rule.origin==self.start and n.start==0] |
|
|
|
|
|
|
|
|
solutions = [n.data for n in table[len(stream)].to_reduce |
|
|
|
|
|
if n.rule.origin==self.start and n.start==0] |
|
|
|
|
|
|
|
|
if not solutions: |
|
|
if not solutions: |
|
|
raise ParseError('Incomplete parse: Could not find a solution to input') |
|
|
raise ParseError('Incomplete parse: Could not find a solution to input') |
|
|