|
|
@@ -1,11 +1,13 @@ |
|
|
|
from ..utils import classify, classify_bool, bfs, fzset |
|
|
|
from ..common import GrammarError, is_terminal |
|
|
|
from lalr_analysis import Rule, RulePtr, GrammarAnalyzer |
|
|
|
from ..common import ParseError, UnexpectedToken, is_terminal |
|
|
|
from lalr_analysis import GrammarAnalyzer |
|
|
|
|
|
|
|
from ..tree import Tree |
|
|
|
|
|
|
|
class Item: |
|
|
|
def __init__(self, rule_ptr, start): |
|
|
|
def __init__(self, rule_ptr, start, data): |
|
|
|
self.rule_ptr = rule_ptr |
|
|
|
self.start = start |
|
|
|
self.data = data |
|
|
|
|
|
|
|
@property |
|
|
|
def expect(self): |
|
|
@@ -15,8 +17,12 @@ class Item: |
|
|
|
def is_complete(self): |
|
|
|
return self.rule_ptr.is_satisfied |
|
|
|
|
|
|
|
def advance(self): |
|
|
|
return Item(self.rule_ptr.advance(self.expect), self.start) |
|
|
|
@property |
|
|
|
def name(self): |
|
|
|
return self.rule_ptr.rule.origin |
|
|
|
|
|
|
|
def advance(self, data): |
|
|
|
return Item(self.rule_ptr.advance(self.expect), self.start, self.data + [data]) |
|
|
|
|
|
|
|
def __eq__(self, other): |
|
|
|
return self.rule_ptr == other.rule_ptr and self.start == other.start |
|
|
@@ -26,6 +32,7 @@ class Item: |
|
|
|
def __repr__(self): |
|
|
|
return '%s (%s)' % (self.rule_ptr, self.start) |
|
|
|
|
|
|
|
|
|
|
|
class Parser: |
|
|
|
def __init__(self, rules, start): |
|
|
|
self.analyzer = GrammarAnalyzer(rules, start) |
|
|
@@ -37,19 +44,19 @@ class Parser: |
|
|
|
|
|
|
|
def predict(symbol, i): |
|
|
|
assert not is_terminal(symbol), symbol |
|
|
|
return {Item(rp, i) for rp in self.analyzer.expand_rule(symbol)} |
|
|
|
return {Item(rp, i, []) for rp in self.analyzer.expand_rule(symbol)} |
|
|
|
|
|
|
|
def scan(item, inp): |
|
|
|
if item.expect == inp: # TODO Do a smarter match, i.e. regexp |
|
|
|
return {item.advance()} |
|
|
|
return {item.advance(inp)} |
|
|
|
else: |
|
|
|
return set() |
|
|
|
|
|
|
|
def complete(item, table): |
|
|
|
print "Complete:", item |
|
|
|
name = item.rule_ptr.rule.origin |
|
|
|
return {old_item.advance() for old_item in table[item.start] |
|
|
|
if old_item.expect == name} |
|
|
|
name = item.name |
|
|
|
item.data = Tree(name, item.data) |
|
|
|
return {old_item.advance(item.data) for old_item in table[item.start] |
|
|
|
if not old_item.is_complete and old_item.expect == name} |
|
|
|
|
|
|
|
def process_column(i, char): |
|
|
|
cur_set = table[-1] |
|
|
@@ -71,6 +78,10 @@ class Parser: |
|
|
|
to_process = new_items - cur_set |
|
|
|
cur_set |= to_process |
|
|
|
|
|
|
|
if not next_set and char != '$end': |
|
|
|
expect = filter(is_terminal, [i.expect for i in cur_set if not i.is_complete]) |
|
|
|
raise UnexpectedToken(char, expect, stream, i) |
|
|
|
|
|
|
|
# Main loop starts |
|
|
|
|
|
|
|
table = [predict(self.start, 0)] |
|
|
@@ -78,7 +89,16 @@ class Parser: |
|
|
|
for i, char in enumerate(stream): |
|
|
|
process_column(i, char) |
|
|
|
|
|
|
|
process_column(len(stream), None) |
|
|
|
process_column(len(stream), '$end') |
|
|
|
|
|
|
|
# Parse ended. Now build a parse tree |
|
|
|
solutions = [n.data for n in table[len(stream)] |
|
|
|
if n.is_complete and n.name==self.start and n.start==0] |
|
|
|
|
|
|
|
if not solutions: |
|
|
|
raise ParseError('Incomplete parse: Could not find a solution to input') |
|
|
|
|
|
|
|
return solutions |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -86,22 +106,26 @@ class Parser: |
|
|
|
|
|
|
|
# rules = [ |
|
|
|
# ('a', ['a', 'A']), |
|
|
|
# ('a', ['a', 'A', 'a']), |
|
|
|
# ('a', ['a', 'A', 'A', 'a']), |
|
|
|
# ('a', ['A']), |
|
|
|
# ] |
|
|
|
|
|
|
|
# p = Parser(rules, 'a') |
|
|
|
# p.parse('AAA') |
|
|
|
|
|
|
|
rules = [ |
|
|
|
('sum', ['sum', "A", 'product']), |
|
|
|
('sum', ['product']), |
|
|
|
('product', ['product', "M", 'factor']), |
|
|
|
('product', ['factor']), |
|
|
|
('factor', ['L', 'sum', 'R']), |
|
|
|
('factor', ['number']), |
|
|
|
('number', ['N', 'number']), |
|
|
|
('number', ['N']), |
|
|
|
] |
|
|
|
|
|
|
|
p = Parser(rules, 'sum') |
|
|
|
p.parse('NALNMNANR') |
|
|
|
# for x in p.parse('AAAA'): |
|
|
|
# print '->' |
|
|
|
# print x.pretty() |
|
|
|
|
|
|
|
# rules = [ |
|
|
|
# ('sum', ['sum', "A", 'product']), |
|
|
|
# ('sum', ['product']), |
|
|
|
# ('product', ['product', "M", 'factor']), |
|
|
|
# ('product', ['factor']), |
|
|
|
# ('factor', ['L', 'sum', 'R']), |
|
|
|
# ('factor', ['number']), |
|
|
|
# ('number', ['N', 'number']), |
|
|
|
# ('number', ['N']), |
|
|
|
# ] |
|
|
|
|
|
|
|
# p = Parser(rules, 'sum') |
|
|
|
# print p.parse('NALNMNANR') |