jmg
/
gitmirror


			
							"My name is Earley"

from ..utils import classify
from ..common import ParseError, UnexpectedToken

try:
    xrange
except NameError:
    xrange = range

class MatchFailed(object):
    pass

class AbortParseMatch(Exception):
    pass


class Rule(object):
    def __init__(self, name, symbols, postprocess):
        self.name = name
        self.symbols = symbols
        self.postprocess = postprocess

class State(object):
    def __init__(self, rule, expect, reference, data=None):
        self.rule = rule
        self.expect = expect
        self.reference = reference
        self.data = data or []

        self.is_complete = (self.expect == len(self.rule.symbols))
        if not self.is_complete:
            self.expect_symbol = self.rule.symbols[self.expect]
            self.is_terminal = isinstance(self.expect_symbol, tuple)
        else:
            self.is_terminal = False

    def next_state(self, data):
        return State(self.rule, self.expect+1, self.reference, self.data + [data])

    def consume_terminal(self, inp):
        if not self.is_complete and self.is_terminal:
            # PORT: originally tests regexp

            if self.expect_symbol[1] is not None:
                match = self.expect_symbol[1].match(inp)
                if match:
                    return self.next_state(inp)

            elif self.expect_symbol[0] == inp.type:
                return self.next_state(inp)

    def consume_nonterminal(self, inp):
        if not self.is_complete and not self.is_terminal:

            if self.expect_symbol == inp:
                return self.next_state(inp)

    def process(self, location, ind, table, rules, added_rules):

        if self.is_complete:
            # Completed a rule
            if self.rule.postprocess:
                try:
                    self.data = self.rule.postprocess(self.data)
                except AbortParseMatch:
                    self.data = MatchFailed

            if self.data is not MatchFailed:
                for s in table[self.reference]:
                    x = s.consume_nonterminal(self.rule.name)
                    if x:
                        x.data[-1] = self.data
                        x.epsilon_closure(location, ind, table)

        else:
            exp = self.rule.symbols[self.expect]
            if isinstance(exp, tuple):
                return

            for r in rules[exp]:
                assert r.name == exp
                if r not in added_rules:
                    if r.symbols:
                        added_rules.add(r)
                        State(r, 0, location).epsilon_closure(location, ind, table)
                    else:
                        # Empty rule
                        new_copy = self.consume_nonterminal(r.name)
                        new_copy.data[-1] = r.postprocess([]) if r.postprocess else []

                        new_copy.epsilon_closure(location, ind, table)

    def epsilon_closure(self, location, ind, table):
        col = table[location]
        col.append(self)

        if not self.is_complete:
            for i in xrange(ind):
                state = col[i]
                if state.is_complete and state.reference == location:
                    x = self.consume_nonterminal(state.rule.name)
                    if x:
                        x.data[-1] = state.data
                        x.epsilon_closure(location, ind, table)


class Parser(object):
    def __init__(self, rules, start=None):
        self.rules = [Rule(r['name'], r['symbols'], r.get('postprocess', None)) for r in rules]
        self.rules_by_name = classify(self.rules, lambda r: r.name)
        self.start = start or self.rules[0].name

    def advance_to(self, table, added_rules):
        n = len(table)-1
        for w, s in enumerate(table[n]):
            s.process(n, w, table, self.rules_by_name, added_rules)

    def parse(self, stream):
        initial_rules = set(self.rules_by_name[self.start])
        table = [[State(r, 0, 0) for r in initial_rules]]
        self.advance_to(table, initial_rules)

        i = 0

        while i < len(stream):
            col = []

            token = stream[i]
            for s in table[-1]:
                x = s.consume_terminal(token)
                if x:
                    col.append(x)

            if not col:
                expected = {s.expect_symbol for s in table[-1] if s.is_terminal}
                raise UnexpectedToken(stream[i], expected, stream, i)

            table.append(col)
            self.advance_to(table, set())

            i += 1

        res = list(self.finish(table))
        if not res:
            raise ParseError('Incomplete parse')
        return res

    def finish(self, table):
        for t in table[-1]:
            if (t.rule.name == self.start
                and t.expect == len(t.rule.symbols)
                and t.reference == 0
                and t.data is not MatchFailed):
                yield t.data