Cleanup

6 years ago · 8466981c08
--- a/lark/grammar.py
+++ b/lark/grammar.py
@@ -71,7 +71,7 @@ class Rule(Serialize):
        expansion : a list of symbols
        order : index of this expansion amongst all rules of the same name
    """
    __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash', '_rp')
    __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')

    __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options'
    __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
@@ -83,7 +83,6 @@ class Rule(Serialize):
        self.order = order
        self.options = options
        self._hash = hash((self.origin, tuple(self.expansion)))
        self._rp = None

    def _deserialize(self):
        self._hash = hash((self.origin, tuple(self.expansion)))
--- a/lark/parsers/grammar_analysis.py
+++ b/lark/parsers/grammar_analysis.py
@@ -3,20 +3,16 @@ from collections import Counter, defaultdict
 from ..utils import bfs, fzset, classify
 from ..exceptions import GrammarError
 from ..grammar import Rule, Terminal, NonTerminal
 import time


 # optimizations were made so that there should never be two distinct equal RulePtrs
 # to help with hashtable lookup
 class RulePtr(object):
    __slots__ = ('rule', 'index', '_advance')
    __slots__ = ('rule', 'index')

    def __init__(self, rule, index):
        assert isinstance(rule, Rule)
        assert index <= len(rule.expansion)
        self.rule = rule
        self.index = index
        self._advance = None

    def __repr__(self):
        before = [x.name for x in self.rule.expansion[:self.index]]
@@ -27,19 +23,19 @@ class RulePtr(object):
    def next(self):
        return self.rule.expansion[self.index]

    # don't create duplicate RulePtrs
    def advance(self, sym):
        assert self.next == sym
        a = self._advance
        if a is None:
            a = RulePtr(self.rule, self.index + 1)
            self._advance = a
        return a
        return RulePtr(self.rule, self.index+1)

    @property
    def is_satisfied(self):
        return self.index == len(self.rule.expansion)

    def __eq__(self, other):
        return self.rule == other.rule and self.index == other.index
    def __hash__(self):
        return hash((self.rule, self.index))


 # state generation ensures no duplicate LR0ItemSets
 class LR0ItemSet(object):
@@ -159,19 +155,11 @@ class GrammarAnalyzer(object):
        self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)

        # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
        for root_rule in lr0_root_rules.values():
            root_rule._rp = RulePtr(root_rule, 0)
        self.lr0_start_states = {start: LR0ItemSet([root_rule._rp], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
        self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
                for start, root_rule in lr0_root_rules.items()}

        self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)

        self.nonterminal_transitions = []
        self.directly_reads = defaultdict(set)
        self.reads = defaultdict(set)
        self.includes = defaultdict(set)
        self.lookback = defaultdict(set)

    def expand_rule(self, source_rule, rules_by_origin=None):
        "Returns all init_ptrs accessible by rule (recursive)"

@@ -183,11 +171,7 @@ class GrammarAnalyzer(object):
            assert not rule.is_term, rule

            for r in rules_by_origin[rule]:
                # don't create duplicate RulePtr objects
                init_ptr = r._rp
                if init_ptr is None:
                    init_ptr = RulePtr(r, 0)
                    r._rp = init_ptr
                init_ptr = RulePtr(r, 0)
                init_ptrs.add(init_ptr)

                if r.expansion: # if not empty rule
--- a/lark/parsers/lalr_analysis.py
+++ b/lark/parsers/lalr_analysis.py
@@ -15,8 +15,6 @@ from ..exceptions import GrammarError
 from .grammar_analysis import GrammarAnalyzer, Terminal, LR0ItemSet
 from ..grammar import Rule

 import time

 ###{standalone

 class Action:
@@ -115,8 +113,8 @@ def traverse(x, S, N, X, R, G, F):
    S.append(x)
    d = len(S)
    N[x] = d
    F[x] = G(x)
    for y in R(x):
    F[x] = G[x]
    for y in R[x]:
        if N[y] == 0:
            traverse(y, S, N, X, R, G, F)
        n_x = N[x]
@@ -137,9 +135,17 @@ def traverse(x, S, N, X, R, G, F):


 class LALR_Analyzer(GrammarAnalyzer):
    def __init__(self, parser_conf, debug=False):
        GrammarAnalyzer.__init__(self, parser_conf, debug)
        self.nonterminal_transitions = []
        self.directly_reads = defaultdict(set)
        self.reads = defaultdict(set)
        self.includes = defaultdict(set)
        self.lookback = defaultdict(set)


    def compute_lr0_states(self):
        self.states = set()
        self.lr0_states = set()
        # map of kernels to LR0ItemSets
        cache = {}

@@ -161,7 +167,7 @@ class LALR_Analyzer(GrammarAnalyzer):
                state.transitions[sym] = new_state
                yield new_state

            self.states.add(state)
            self.lr0_states.add(state)

        for _ in bfs(self.lr0_start_states.values(), step):
            pass
@@ -174,14 +180,14 @@ class LALR_Analyzer(GrammarAnalyzer):
                assert(rp.index == 0)
                self.directly_reads[(root, rp.next)] = set([ Terminal('$END') ])

        for state in self.states:
        for state in self.lr0_states:
            seen = set()
            for rp in state.closure:
                if rp.is_satisfied:
                    continue
                s = rp.next
                # if s is a not a nonterminal
                if not s in self.lr0_rules_by_origin:
                if s not in self.lr0_rules_by_origin:
                    continue
                if s in seen:
                    continue
@@ -201,11 +207,6 @@ class LALR_Analyzer(GrammarAnalyzer):
                    if s2 in self.NULLABLE:
                        r.add((next_state, s2))

    def compute_read_sets(self):
        R = lambda nt: self.reads[nt]
        G = lambda nt: self.directly_reads[nt]
        self.read_sets = digraph(self.nonterminal_transitions, R, G)

    def compute_includes_lookback(self):
        for nt in self.nonterminal_transitions:
            state, nonterminal = nt
@@ -220,9 +221,8 @@ class LALR_Analyzer(GrammarAnalyzer):
                    s = rp.rule.expansion[i]
                    nt2 = (state2, s)
                    state2 = state2.transitions[s]
                    if not nt2 in self.reads:
                    if nt2 not in self.reads:
                        continue
                    j = i + 1
                    for j in range(i + 1, len(rp.rule.expansion)):
                        if not rp.rule.expansion[j] in self.NULLABLE:
                            break
@@ -236,20 +236,18 @@ class LALR_Analyzer(GrammarAnalyzer):
            for nt2 in includes:
                self.includes[nt2].add(nt)

    def compute_follow_sets(self):
        R = lambda nt: self.includes[nt]
        G = lambda nt: self.read_sets[nt]
        self.follow_sets = digraph(self.nonterminal_transitions, R, G)

    def compute_lookaheads(self):
        read_sets = digraph(self.nonterminal_transitions, self.reads, self.directly_reads)
        follow_sets = digraph(self.nonterminal_transitions, self.includes, read_sets)

        for nt, lookbacks in self.lookback.items():
            for state, rule in lookbacks:
                for s in self.follow_sets[nt]:
                for s in follow_sets[nt]:
                    state.lookaheads[s].add(rule)

    def compute_lalr1_states(self):
        m = {}
        for state in self.states:
        for state in self.lr0_states:
            actions = {}
            for la, next_state in state.transitions.items():
                actions[la] = (Shift, next_state.closure)
@@ -281,3 +279,10 @@ class LALR_Analyzer(GrammarAnalyzer):
            self.parse_table = self._parse_table
        else:
            self.parse_table = IntParseTable.from_ParseTable(self._parse_table)

    def compute_lalr(self):
        self.compute_lr0_states()
        self.compute_reads_relations()
        self.compute_includes_lookback()
        self.compute_lookaheads()
        self.compute_lalr1_states()
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -8,8 +8,6 @@ from ..utils import Enumerator, Serialize

 from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable

 import time


 ###{standalone
 class LALR_Parser(object):
@@ -17,13 +15,7 @@ class LALR_Parser(object):
        assert all(r.options is None or r.options.priority is None
                   for r in parser_conf.rules), "LALR doesn't yet support prioritization"
        analysis = LALR_Analyzer(parser_conf, debug=debug)
        analysis.compute_lr0_states()
        analysis.compute_reads_relations()
        analysis.compute_read_sets()
        analysis.compute_includes_lookback()
        analysis.compute_follow_sets()
        analysis.compute_lookaheads()
        analysis.compute_lalr1_states()
        analysis.compute_lalr()
        callbacks = parser_conf.callbacks

        self._parse_table = analysis.parse_table
@@ -88,11 +80,6 @@ class _Parser:
            state_stack.append(new_state)
            value_stack.append(value)

            if state_stack[-1] == end_state:
                return True

            return False

        # Main LALR-parser loop
        for token in stream:
            while True:
@@ -111,7 +98,8 @@ class _Parser:
        while True:
            _action, arg = get_action(token)
            assert(_action is Reduce)
            if reduce(arg):
            reduce(arg)
            if state_stack[-1] == end_state:
                return value_stack[-1]

 ###}