from ..common import ParseError, UnexpectedToken, is_terminal from lalr_analysis import GrammarAnalyzer from ..tree import Tree class Item: def __init__(self, rule_ptr, start, data): self.rule_ptr = rule_ptr self.start = start self.data = data @property def expect(self): return self.rule_ptr.next @property def is_complete(self): return self.rule_ptr.is_satisfied @property def name(self): return self.rule_ptr.rule.origin def advance(self, data): return Item(self.rule_ptr.advance(self.expect), self.start, self.data + [data]) def __eq__(self, other): return self.rule_ptr == other.rule_ptr and self.start == other.start def __hash__(self): return hash((self.rule_ptr, self.start)) def __repr__(self): return '%s (%s)' % (self.rule_ptr, self.start) class Parser: def __init__(self, rules, start): self.analyzer = GrammarAnalyzer(rules, start) self.start = start def parse(self, stream): # Define parser functions def predict(symbol, i): assert not is_terminal(symbol), symbol return {Item(rp, i, []) for rp in self.analyzer.expand_rule(symbol)} def scan(item, inp): if item.expect == inp: # TODO Do a smarter match, i.e. regexp return {item.advance(inp)} else: return set() def complete(item, table): name = item.name item.data = Tree(name, item.data) return {old_item.advance(item.data) for old_item in table[item.start] if not old_item.is_complete and old_item.expect == name} def process_column(i, char): cur_set = table[-1] next_set = set() table.append(next_set) to_process = cur_set while to_process: new_items = set() for item in to_process: if item.is_complete: new_items |= complete(item, table) else: if is_terminal(item.expect): next_set |= scan(item, char) else: new_items |= predict(item.expect, i) to_process = new_items - cur_set cur_set |= to_process if not next_set and char != '$end': expect = filter(is_terminal, [i.expect for i in cur_set if not i.is_complete]) raise UnexpectedToken(char, expect, stream, i) # Main loop starts table = [predict(self.start, 0)] for i, char in enumerate(stream): process_column(i, char) process_column(len(stream), '$end') # Parse ended. Now build a parse tree solutions = [n.data for n in table[len(stream)] if n.is_complete and n.name==self.start and n.start==0] if not solutions: raise ParseError('Incomplete parse: Could not find a solution to input') return solutions # rules = [ # ('a', ['a', 'A']), # ('a', ['a', 'A', 'a']), # ('a', ['a', 'A', 'A', 'a']), # ('a', ['A']), # ] # p = Parser(rules, 'a') # for x in p.parse('AAAA'): # print '->' # print x.pretty() # rules = [ # ('sum', ['sum', "A", 'product']), # ('sum', ['product']), # ('product', ['product', "M", 'factor']), # ('product', ['factor']), # ('factor', ['L', 'sum', 'R']), # ('factor', ['number']), # ('number', ['N', 'number']), # ('number', ['N']), # ] # p = Parser(rules, 'sum') # print p.parse('NALNMNANR')