| @@ -1,11 +1,13 @@ | |||||
| from ..utils import classify, classify_bool, bfs, fzset | |||||
| from ..common import GrammarError, is_terminal | |||||
| from lalr_analysis import Rule, RulePtr, GrammarAnalyzer | |||||
| from ..common import ParseError, UnexpectedToken, is_terminal | |||||
| from lalr_analysis import GrammarAnalyzer | |||||
| from ..tree import Tree | |||||
| class Item: | class Item: | ||||
| def __init__(self, rule_ptr, start): | |||||
| def __init__(self, rule_ptr, start, data): | |||||
| self.rule_ptr = rule_ptr | self.rule_ptr = rule_ptr | ||||
| self.start = start | self.start = start | ||||
| self.data = data | |||||
| @property | @property | ||||
| def expect(self): | def expect(self): | ||||
| @@ -15,8 +17,12 @@ class Item: | |||||
| def is_complete(self): | def is_complete(self): | ||||
| return self.rule_ptr.is_satisfied | return self.rule_ptr.is_satisfied | ||||
| def advance(self): | |||||
| return Item(self.rule_ptr.advance(self.expect), self.start) | |||||
| @property | |||||
| def name(self): | |||||
| return self.rule_ptr.rule.origin | |||||
| def advance(self, data): | |||||
| return Item(self.rule_ptr.advance(self.expect), self.start, self.data + [data]) | |||||
| def __eq__(self, other): | def __eq__(self, other): | ||||
| return self.rule_ptr == other.rule_ptr and self.start == other.start | return self.rule_ptr == other.rule_ptr and self.start == other.start | ||||
| @@ -26,6 +32,7 @@ class Item: | |||||
| def __repr__(self): | def __repr__(self): | ||||
| return '%s (%s)' % (self.rule_ptr, self.start) | return '%s (%s)' % (self.rule_ptr, self.start) | ||||
| class Parser: | class Parser: | ||||
| def __init__(self, rules, start): | def __init__(self, rules, start): | ||||
| self.analyzer = GrammarAnalyzer(rules, start) | self.analyzer = GrammarAnalyzer(rules, start) | ||||
| @@ -37,19 +44,19 @@ class Parser: | |||||
| def predict(symbol, i): | def predict(symbol, i): | ||||
| assert not is_terminal(symbol), symbol | assert not is_terminal(symbol), symbol | ||||
| return {Item(rp, i) for rp in self.analyzer.expand_rule(symbol)} | |||||
| return {Item(rp, i, []) for rp in self.analyzer.expand_rule(symbol)} | |||||
| def scan(item, inp): | def scan(item, inp): | ||||
| if item.expect == inp: # TODO Do a smarter match, i.e. regexp | if item.expect == inp: # TODO Do a smarter match, i.e. regexp | ||||
| return {item.advance()} | |||||
| return {item.advance(inp)} | |||||
| else: | else: | ||||
| return set() | return set() | ||||
| def complete(item, table): | def complete(item, table): | ||||
| print "Complete:", item | |||||
| name = item.rule_ptr.rule.origin | |||||
| return {old_item.advance() for old_item in table[item.start] | |||||
| if old_item.expect == name} | |||||
| name = item.name | |||||
| item.data = Tree(name, item.data) | |||||
| return {old_item.advance(item.data) for old_item in table[item.start] | |||||
| if not old_item.is_complete and old_item.expect == name} | |||||
| def process_column(i, char): | def process_column(i, char): | ||||
| cur_set = table[-1] | cur_set = table[-1] | ||||
| @@ -71,6 +78,10 @@ class Parser: | |||||
| to_process = new_items - cur_set | to_process = new_items - cur_set | ||||
| cur_set |= to_process | cur_set |= to_process | ||||
| if not next_set and char != '$end': | |||||
| expect = filter(is_terminal, [i.expect for i in cur_set if not i.is_complete]) | |||||
| raise UnexpectedToken(char, expect, stream, i) | |||||
| # Main loop starts | # Main loop starts | ||||
| table = [predict(self.start, 0)] | table = [predict(self.start, 0)] | ||||
| @@ -78,7 +89,16 @@ class Parser: | |||||
| for i, char in enumerate(stream): | for i, char in enumerate(stream): | ||||
| process_column(i, char) | process_column(i, char) | ||||
| process_column(len(stream), None) | |||||
| process_column(len(stream), '$end') | |||||
| # Parse ended. Now build a parse tree | |||||
| solutions = [n.data for n in table[len(stream)] | |||||
| if n.is_complete and n.name==self.start and n.start==0] | |||||
| if not solutions: | |||||
| raise ParseError('Incomplete parse: Could not find a solution to input') | |||||
| return solutions | |||||
| @@ -86,22 +106,26 @@ class Parser: | |||||
| # rules = [ | # rules = [ | ||||
| # ('a', ['a', 'A']), | # ('a', ['a', 'A']), | ||||
| # ('a', ['a', 'A', 'a']), | |||||
| # ('a', ['a', 'A', 'A', 'a']), | |||||
| # ('a', ['A']), | # ('a', ['A']), | ||||
| # ] | # ] | ||||
| # p = Parser(rules, 'a') | # p = Parser(rules, 'a') | ||||
| # p.parse('AAA') | |||||
| rules = [ | |||||
| ('sum', ['sum', "A", 'product']), | |||||
| ('sum', ['product']), | |||||
| ('product', ['product', "M", 'factor']), | |||||
| ('product', ['factor']), | |||||
| ('factor', ['L', 'sum', 'R']), | |||||
| ('factor', ['number']), | |||||
| ('number', ['N', 'number']), | |||||
| ('number', ['N']), | |||||
| ] | |||||
| p = Parser(rules, 'sum') | |||||
| p.parse('NALNMNANR') | |||||
| # for x in p.parse('AAAA'): | |||||
| # print '->' | |||||
| # print x.pretty() | |||||
| # rules = [ | |||||
| # ('sum', ['sum', "A", 'product']), | |||||
| # ('sum', ['product']), | |||||
| # ('product', ['product', "M", 'factor']), | |||||
| # ('product', ['factor']), | |||||
| # ('factor', ['L', 'sum', 'R']), | |||||
| # ('factor', ['number']), | |||||
| # ('number', ['N', 'number']), | |||||
| # ('number', ['N']), | |||||
| # ] | |||||
| # p = Parser(rules, 'sum') | |||||
| # print p.parse('NALNMNANR') | |||||