| @@ -14,6 +14,7 @@ | |||
| # Email : erezshin@gmail.com | |||
| from functools import cmp_to_key | |||
| from collections import defaultdict | |||
| from ..utils import compare | |||
| from ..common import ParseError, UnexpectedToken, Terminal | |||
| @@ -125,7 +126,6 @@ class Column: | |||
| def __nonzero__(self): | |||
| return bool(self.item_count) | |||
| from collections import defaultdict | |||
| class Parser: | |||
| def __init__(self, rules, start_symbol, callback, resolve_ambiguity=True, ignore=()): | |||
| self.analysis = GrammarAnalyzer(rules, start_symbol) | |||
| @@ -144,7 +144,7 @@ class Parser: | |||
| def parse(self, stream, start_symbol=None): | |||
| # Define parser functions | |||
| start_symbol = start_symbol or self.start_symbol | |||
| matched_terminals = defaultdict(set) | |||
| matched_terminals = defaultdict(list) | |||
| def predict(nonterm, column): | |||
| assert not isinstance(nonterm, Terminal), nonterm | |||
| @@ -178,24 +178,16 @@ class Parser: | |||
| for item in to_scan: | |||
| m = item.expect.match(stream, i) | |||
| if m: | |||
| matched_terminals[m.end()].add(item.advance(m.group(0))) | |||
| matched_terminals[m.end()].append(item.advance(m.group(0))) | |||
| # s = m.group(0) | |||
| # for j in range(1, len(s)): | |||
| # m = item.expect.match(s[:-j]) | |||
| # if m: | |||
| # matched_terminals[m.end()].add(item.advance(m.group(0))) | |||
| s = m.group(0) | |||
| for j in range(1, len(s)): | |||
| m = item.expect.match(s[:-j]) | |||
| if m: | |||
| matched_terminals[m.end()].append(item.advance(m.group(0))) | |||
| next_set = Column(i+1) | |||
| # next_set.add(item.advance(token) for item in to_scan if item.expect.match(token)) | |||
| next_set.add(matched_terminals[i+1]) | |||
| # del matched_terminals[i+1] | |||
| # if not next_set: | |||
| # import pdb | |||
| # pdb.set_trace() | |||
| # expect = {i.expect for i in column.to_scan} | |||
| # raise UnexpectedToken(token, expect, stream, i) | |||
| return next_set | |||
| @@ -205,7 +197,7 @@ class Parser: | |||
| column = column0 | |||
| for i, token in enumerate(stream): | |||
| # print i, token | |||
| predict_and_complete(column) | |||
| column = scan(i, token, column) | |||
| @@ -226,7 +218,7 @@ class Parser: | |||
| ResolveAmbig().visit(tree) | |||
| return ApplyCallbacks(self.postprocess).transform(tree) | |||
| class ApplyCallbacks(Transformer_NoRecurse): | |||
| @@ -250,7 +242,7 @@ def _compare_rules(rule1, rule2): | |||
| def _compare_drv(tree1, tree2): | |||
| if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | |||
| return compare(tree1, tree2) | |||
| return -compare(tree1, tree2) | |||
| c = _compare_rules(tree1.rule, tree2.rule) | |||
| if c: | |||
| @@ -262,10 +254,15 @@ def _compare_drv(tree1, tree2): | |||
| if c: | |||
| return c | |||
| return compare(len(tree1.children), len(tree2.children)) | |||
| return -compare(len(tree1.children), len(tree2.children)) | |||
| class ResolveAmbig(Visitor_NoRecurse): | |||
| """Resolves ambiguity in resulting parse tree. | |||
| Minimizes rule length, maximizes match length. | |||
| """ | |||
| def _ambig(self, tree): | |||
| best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||
| assert best.data == 'drv' | |||