| @@ -17,8 +17,8 @@ from functools import cmp_to_key | |||
| from ..utils import compare | |||
| from ..common import ParseError, UnexpectedToken, Terminal | |||
| from .grammar_analysis import GrammarAnalyzer | |||
| from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | |||
| from .grammar_analysis import GrammarAnalyzer | |||
| class EndToken: | |||
| @@ -32,6 +32,8 @@ class Derivation(Tree): | |||
| END_TOKEN = EndToken() | |||
| class Item(object): | |||
| "An Earley Item, the atom of the algorithm." | |||
| def __init__(self, rule, ptr, start, tree): | |||
| self.rule = rule | |||
| self.ptr = ptr | |||
| @@ -77,7 +79,7 @@ class NewsList(list): | |||
| class Column: | |||
| "An entry in the table, aka Earley Chart" | |||
| "An entry in the table, aka Earley Chart. Contains lists of items." | |||
| def __init__(self, i): | |||
| self.i = i | |||
| self.to_reduce = NewsList() | |||
| @@ -94,7 +96,6 @@ class Column: | |||
| Makes sure only unique items are added. | |||
| """ | |||
| added = self.added | |||
| for item in items: | |||
| if item.is_complete: | |||
| @@ -112,8 +113,8 @@ class Column: | |||
| self.completed[item] = item | |||
| self.to_reduce.append(item) | |||
| else: | |||
| if item not in added: | |||
| added.add(item) | |||
| if item not in self.added: | |||
| self.added.add(item) | |||
| if isinstance(item.expect, Terminal): | |||
| self.to_scan.append(item) | |||
| else: | |||
| @@ -125,9 +126,9 @@ class Column: | |||
| return bool(self.item_count) | |||
| class Parser: | |||
| def __init__(self, rules, start, callback, resolve_ambiguity=True): | |||
| self.analysis = GrammarAnalyzer(rules, start) | |||
| self.start = start | |||
| def __init__(self, rules, start_symbol, callback, resolve_ambiguity=True): | |||
| self.analysis = GrammarAnalyzer(rules, start_symbol) | |||
| self.start_symbol = start_symbol | |||
| self.resolve_ambiguity = resolve_ambiguity | |||
| self.postprocess = {} | |||
| @@ -138,60 +139,57 @@ class Parser: | |||
| self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a)) | |||
| self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] | |||
| def parse(self, stream, start=None): | |||
| def parse(self, stream, start_symbol=None): | |||
| # Define parser functions | |||
| start = start or self.start | |||
| start_symbol = start_symbol or self.start_symbol | |||
| def predict(nonterm, i): | |||
| def predict(nonterm, column): | |||
| assert not isinstance(nonterm, Terminal), nonterm | |||
| return [Item(rule, 0, i, None) for rule in self.predictions[nonterm]] | |||
| return [Item(rule, 0, column, None) for rule in self.predictions[nonterm]] | |||
| def complete(item): | |||
| name = item.rule.origin | |||
| return [i.advance(item.tree) for i in item.start.to_predict if i.expect == name] | |||
| def process_column(i, token, cur_set): | |||
| next_set = Column(i) | |||
| def predict_and_complete(column): | |||
| while True: | |||
| to_predict = {x.expect for x in cur_set.to_predict.get_news() | |||
| to_predict = {x.expect for x in column.to_predict.get_news() | |||
| if x.ptr} # if not part of an already predicted batch | |||
| to_reduce = cur_set.to_reduce.get_news() | |||
| to_reduce = column.to_reduce.get_news() | |||
| if not (to_predict or to_reduce): | |||
| break | |||
| for nonterm in to_predict: | |||
| cur_set.add( predict(nonterm, cur_set) ) | |||
| column.add( predict(nonterm, column) ) | |||
| for item in to_reduce: | |||
| cur_set.add( complete(item) ) | |||
| column.add( complete(item) ) | |||
| if token is not END_TOKEN: | |||
| to_scan = cur_set.to_scan.get_news() | |||
| for item in to_scan: | |||
| if item.expect.match(token): | |||
| next_set.add([item.advance(token)]) | |||
| def scan(i, token, column): | |||
| to_scan = column.to_scan.get_news() | |||
| next_set = Column(i) | |||
| next_set.add(item.advance(token) for item in to_scan if item.expect.match(token)) | |||
| if not next_set and token is not END_TOKEN: | |||
| expect = {i.expect for i in cur_set.to_scan} | |||
| if not next_set: | |||
| expect = {i.expect for i in column.to_scan} | |||
| raise UnexpectedToken(token, expect, stream, i) | |||
| return cur_set, next_set | |||
| return next_set | |||
| # Main loop starts | |||
| column0 = Column(0) | |||
| column0.add(predict(start, column0)) | |||
| column0.add(predict(start_symbol, column0)) | |||
| cur_set = column0 | |||
| i = 0 | |||
| for token in stream: | |||
| _, cur_set = process_column(i, token, cur_set) | |||
| i += 1 | |||
| column = column0 | |||
| for i, token in enumerate(stream): | |||
| predict_and_complete(column) | |||
| column = scan(i, token, column) | |||
| last_set, _ = process_column(i, END_TOKEN, cur_set) | |||
| predict_and_complete(column) | |||
| # Parse ended. Now build a parse tree | |||
| solutions = [n.tree for n in last_set.to_reduce | |||
| if n.rule.origin==start and n.start is column0] | |||
| solutions = [n.tree for n in column.to_reduce | |||
| if n.rule.origin==start_symbol and n.start is column0] | |||
| if not solutions: | |||
| raise ParseError('Incomplete parse: Could not find a solution to input') | |||