Browse Source

Implement Joop Leo's optimizations for right recursion performance

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.6
night199uk 6 years ago
parent
commit
04d90fa916
4 changed files with 303 additions and 76 deletions
  1. +112
    -25
      lark/parsers/earley.py
  2. +34
    -4
      lark/parsers/earley_common.py
  3. +45
    -22
      lark/parsers/earley_forest.py
  4. +112
    -25
      lark/parsers/xearley.py

+ 112
- 25
lark/parsers/earley.py View File

@@ -16,7 +16,7 @@ from ..visitors import Transformer_InPlace, v_args
from ..exceptions import ParseError, UnexpectedToken from ..exceptions import ParseError, UnexpectedToken
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal from ..grammar import NonTerminal
from .earley_common import Item
from .earley_common import Item, TransitiveItem
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode


from collections import deque, defaultdict from collections import deque, defaultdict
@@ -28,6 +28,7 @@ class Parser:
self.resolve_ambiguity = resolve_ambiguity self.resolve_ambiguity = resolve_ambiguity


self.FIRST = analysis.FIRST self.FIRST = analysis.FIRST
self.NULLABLE = analysis.NULLABLE
self.callbacks = {} self.callbacks = {}
self.predictions = {} self.predictions = {}


@@ -56,14 +57,68 @@ class Parser:
node_cache = {} node_cache = {}
token_cache = {} token_cache = {}
columns = [] columns = []

def make_symbol_node(s, start, end):
label = (s, start, end)
if label in node_cache:
node = node_cache[label]
transitives = []

def is_quasi_complete(item):
if item.is_complete:
return True

quasi = item.advance()
while not quasi.is_complete:
symbol = quasi.expect
if symbol not in self.NULLABLE:
return False
if quasi.rule.origin == start_symbol and symbol == start_symbol:
return False
quasi = quasi.advance()
return True

def create_leo_transitives(item, trule, previous, visited = None):
if visited is None:
visited = set()

if item.rule.origin in transitives[item.start]:
previous = trule = transitives[item.start][item.rule.origin]
return trule, previous

is_empty_rule = not self.FIRST[item.rule.origin]
if is_empty_rule:
return trule, previous

originator = None
for key in columns[item.start]:
if key.expect is not None and key.expect == item.rule.origin:
if originator is not None:
return trule, previous
originator = key

if originator is None:
return trule, previous

if originator in visited:
return trule, previous

visited.add(originator)
if not is_quasi_complete(originator):
return trule, previous

trule = originator.advance()
if originator.start != item.start:
visited.clear()

trule, previous = create_leo_transitives(originator, trule, previous, visited)
if trule is None:
return trule, previous

titem = None
if previous is not None:
titem = TransitiveItem(item.rule.origin, trule, originator, previous.column)
previous.next_titem = titem
else: else:
node = node_cache[label] = SymbolNode(s, start, end)
return node
titem = TransitiveItem(item.rule.origin, trule, originator, item.start)

previous = transitives[item.start][item.rule.origin] = titem
return trule, previous


def predict_and_complete(i, to_scan): def predict_and_complete(i, to_scan):
"""The core Earley Predictor and Completer. """The core Earley Predictor and Completer.
@@ -84,23 +139,26 @@ class Parser:
### The Earley completer ### The Earley completer
if item.is_complete: ### (item.s == string) if item.is_complete: ### (item.s == string)
if item.node is None: if item.node is None:
item.node = make_symbol_node(item.s, item.start, i)
label = (item.s, item.start, i)
item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
item.node.add_family(item.s, item.rule, item.start, None, None) item.node.add_family(item.s, item.rule, item.start, None, None)


# Empty has 0 length. If we complete an empty symbol in a particular
# parse step, we need to be able to use that same empty symbol to complete
# any predictions that result, that themselves require empty. Avoids
# infinite recursion on empty symbols.
# held_completions is 'H' in E.Scott's paper.
is_empty_item = item.start == i
if is_empty_item:
held_completions[item.rule.origin] = item.node

originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
for originator in originators:
new_item = originator.advance()
new_item.node = make_symbol_node(new_item.s, originator.start, i)
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, originator.node, item.node)
create_leo_transitives(item, None, None)

###R Joop Leo right recursion Completer
if item.rule.origin in transitives[item.start]:
transitive = transitives[item.start][item.s]
if transitive.previous in transitives[transitive.column]:
root_transitive = transitives[transitive.column][transitive.previous]
else:
root_transitive = transitive

label = (root_transitive.s, root_transitive.start, i)
node = vn = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
vn.add_path(root_transitive, item.node)

new_item = Item(transitive.rule, transitive.ptr, transitive.start)
new_item.node = vn
if new_item.expect in self.TERMINALS: if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q # Add (B :: aC.B, h, y) to Q
to_scan.add(new_item) to_scan.add(new_item)
@@ -108,6 +166,30 @@ class Parser:
# Add (B :: aC.B, h, y) to Ei and R # Add (B :: aC.B, h, y) to Ei and R
column.add(new_item) column.add(new_item)
items.append(new_item) items.append(new_item)
###R Regular Earley completer
else:
# Empty has 0 length. If we complete an empty symbol in a particular
# parse step, we need to be able to use that same empty symbol to complete
# any predictions that result, that themselves require empty. Avoids
# infinite recursion on empty symbols.
# held_completions is 'H' in E.Scott's paper.
is_empty_item = item.start == i
if is_empty_item:
held_completions[item.rule.origin] = item.node

originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
for originator in originators:
new_item = originator.advance()
label = (new_item.s, originator.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q
to_scan.add(new_item)
elif new_item not in column:
# Add (B :: aC.B, h, y) to Ei and R
column.add(new_item)
items.append(new_item)


### The Earley predictor ### The Earley predictor
elif item.expect in self.NON_TERMINALS: ### (item.s == lr0) elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
@@ -119,7 +201,8 @@ class Parser:
# Process any held completions (H). # Process any held completions (H).
if item.expect in held_completions: if item.expect in held_completions:
new_item = item.advance() new_item = item.advance()
new_item.node = make_symbol_node(new_item.s, item.start, i)
label = (new_item.s, item.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect]) new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
new_items.append(new_item) new_items.append(new_item)


@@ -141,11 +224,14 @@ class Parser:
next_to_scan = set() next_to_scan = set()
next_set = set() next_set = set()
columns.append(next_set) columns.append(next_set)
next_transitives = dict()
transitives.append(next_transitives)


for item in set(to_scan): for item in set(to_scan):
if match(item.expect, token): if match(item.expect, token):
new_item = item.advance() new_item = item.advance()
new_item.node = make_symbol_node(new_item.s, new_item.start, i)
label = (new_item.s, new_item.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token) new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)


if new_item.expect in self.TERMINALS: if new_item.expect in self.TERMINALS:
@@ -163,6 +249,7 @@ class Parser:


# Main loop starts # Main loop starts
columns.append(set()) columns.append(set())
transitives.append(dict())


## The scan buffer. 'Q' in E.Scott's paper. ## The scan buffer. 'Q' in E.Scott's paper.
to_scan = set() to_scan = set()


+ 34
- 4
lark/parsers/earley_common.py View File

@@ -13,12 +13,13 @@
# Author: Erez Shinan (2017) # Author: Erez Shinan (2017)
# Email : erezshin@gmail.com # Email : erezshin@gmail.com


from ..grammar import NonTerminal, Terminal

class Item(object): class Item(object):
"An Earley Item, the atom of the algorithm." "An Earley Item, the atom of the algorithm."


__slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'node', '_hash')
__slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
def __init__(self, rule, ptr, start): def __init__(self, rule, ptr, start):
assert isinstance(start, int), "start is not an int"
self.is_complete = len(rule.expansion) == ptr self.is_complete = len(rule.expansion) == ptr
self.rule = rule # rule self.rule = rule # rule
self.ptr = ptr # ptr self.ptr = ptr # ptr
@@ -27,13 +28,15 @@ class Item(object):
if self.is_complete: if self.is_complete:
self.s = rule.origin self.s = rule.origin
self.expect = None self.expect = None
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
else: else:
self.s = (rule, ptr) self.s = (rule, ptr)
self.expect = rule.expansion[ptr] self.expect = rule.expansion[ptr]
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
self._hash = hash((self.s, self.start)) self._hash = hash((self.s, self.start))


def advance(self): def advance(self):
return self.__class__(self.rule, self.ptr + 1, self.start)
return Item(self.rule, self.ptr + 1, self.start)


def __eq__(self, other): def __eq__(self, other):
return self is other or (self.s == other.s and self.start == other.start) return self is other or (self.s == other.s and self.start == other.start)
@@ -42,4 +45,31 @@ class Item(object):
return self._hash return self._hash


def __repr__(self): def __repr__(self):
return '%s (%d)' % (self.s if self.is_complete else self.rule.origin, self.start)
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
return '%s (%d)' % (symbol, self.start)


class TransitiveItem(Item):
__slots__ = ('recognized', 'reduction', 'column', 'next_titem')
def __init__(self, recognized, trule, originator, start):
super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start)
self.recognized = recognized
self.reduction = originator
self.column = start
self.next_titem = None
self._hash = hash((self.s, self.start, self.recognized))

def __eq__(self, other):
if not isinstance(other, TransitiveItem):
return False
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized)

def __hash__(self):
return self._hash

def __repr__(self):
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start)

+ 45
- 22
lark/parsers/earley_forest.py View File

@@ -12,7 +12,7 @@ from ..tree import Tree
from ..exceptions import ParseError from ..exceptions import ParseError
from ..lexer import Token from ..lexer import Token
from ..utils import Str from ..utils import Str
from ..grammar import NonTerminal, Terminal
from ..grammar import NonTerminal, Terminal, Symbol


from collections import deque from collections import deque
from importlib import import_module from importlib import import_module
@@ -34,42 +34,65 @@ class SymbolNode(ForestNode):


Hence a Symbol Node with a single child is unambiguous. Hence a Symbol Node with a single child is unambiguous.
""" """
__slots__ = ('s', 'start', 'end', 'children', 'priority', 'is_intermediate')
__slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash')
def __init__(self, s, start, end): def __init__(self, s, start, end):
self.s = s self.s = s
self.start = start self.start = start
self.end = end self.end = end
self.children = set()
self._children = set()
self.paths = set()
self.paths_loaded = False
self.priority = None self.priority = None
self.is_intermediate = isinstance(s, tuple) self.is_intermediate = isinstance(s, tuple)
self._hash = hash((self.s, self.start, self.end))


def add_family(self, lr0, rule, start, left, right): def add_family(self, lr0, rule, start, left, right):
self.children.add(PackedNode(self, lr0, rule, start, left, right))
self._children.add(PackedNode(self, lr0, rule, start, left, right))

def add_path(self, transitive, node):
self.paths.add((transitive, node))

def load_paths(self):
for transitive, node in self.paths:
if transitive.next_titem is not None:
vn = SymbolNode(transitive.next_titem.s, transitive.next_titem.start, self.end)
vn.add_path(transitive.next_titem, node)
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
else:
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
self.paths_loaded = True


@property @property
def is_ambiguous(self): def is_ambiguous(self):
return len(self.children) > 1 return len(self.children) > 1


@property
def children(self):
if not self.paths_loaded:
self.load_paths()
return self._children

def __iter__(self): def __iter__(self):
return iter(self.children)
return iter(self._children)


def __eq__(self, other): def __eq__(self, other):
if not isinstance(other, SymbolNode): if not isinstance(other, SymbolNode):
return False return False
return self is other or (self.s == other.s and self.start == other.start and self.end is other.end)
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end)


def __hash__(self): def __hash__(self):
return hash((self.s, self.start, self.end))
return self._hash


def __repr__(self): def __repr__(self):
if self.is_intermediate: if self.is_intermediate:
rule = self.s[0] rule = self.s[0]
ptr = self.s[1] ptr = self.s[1]
names = [ "{}*".format(expansion.name) if index == ptr else expansion.name for index, expansion in enumerate(rule.expansion) ]
symbol = "{} ::= {}".format(rule.origin.name, ' '.join(names))
before = ( expansion.name for expansion in rule.expansion[:ptr] )
after = ( expansion.name for expansion in rule.expansion[ptr:] )
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
else: else:
symbol = self.s.name symbol = self.s.name
return "(%s, %d, %d, %d)" % (symbol, self.start, self.end, self.priority if self.priority is not None else 0)
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority if self.priority is not None else 0)


class PackedNode(ForestNode): class PackedNode(ForestNode):
""" """
@@ -115,11 +138,12 @@ class PackedNode(ForestNode):
if isinstance(self.s, tuple): if isinstance(self.s, tuple):
rule = self.s[0] rule = self.s[0]
ptr = self.s[1] ptr = self.s[1]
names = [ "{}*".format(expansion.name) if index == ptr else expansion.name for index, expansion in enumerate(rule.expansion) ]
symbol = "{} ::= {}".format(rule.origin.name, ' '.join(names))
before = ( expansion.name for expansion in rule.expansion[:ptr] )
after = ( expansion.name for expansion in rule.expansion[ptr:] )
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
else: else:
symbol = self.s.name symbol = self.s.name
return "{%s, %d, %d}" % (symbol, self.start, self.priority if self.priority is not None else 0)
return "({}, {}, {})".format(symbol, self.start, self.priority)


class ForestVisitor(object): class ForestVisitor(object):
""" """
@@ -182,8 +206,8 @@ class ForestVisitor(object):


current_id = id(current) current_id = id(current)
if current_id in visiting: if current_id in visiting:
if isinstance(current, PackedNode): vpno(current)
else: vsno(current)
if isinstance(current, PackedNode): vpno(current)
else: vsno(current)
input_stack.pop() input_stack.pop()
visiting.remove(current_id) visiting.remove(current_id)
continue continue
@@ -226,7 +250,7 @@ class ForestSumVisitor(ForestVisitor):


def visit_symbol_node_out(self, node): def visit_symbol_node_out(self, node):
node.priority = max(child.priority for child in node.children) node.priority = max(child.priority for child in node.children)
node.children = sorted(node.children, reverse = True)
node._children = sorted(node.children, reverse = True)


class ForestAntiscoreSumVisitor(ForestSumVisitor): class ForestAntiscoreSumVisitor(ForestSumVisitor):
""" """
@@ -240,7 +264,7 @@ class ForestAntiscoreSumVisitor(ForestSumVisitor):
""" """
def visit_symbol_node_out(self, node): def visit_symbol_node_out(self, node):
node.priority = min(child.priority for child in node.children) node.priority = min(child.priority for child in node.children)
node.children = sorted(node.children, key=AntiscoreSumComparator, reverse = True)
node._children = sorted(node.children, key=AntiscoreSumComparator, reverse = True)


class AntiscoreSumComparator(object): class AntiscoreSumComparator(object):
""" """
@@ -342,7 +366,7 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor):
return iter(node.children) return iter(node.children)


def visit_symbol_node_out(self, node): def visit_symbol_node_out(self, node):
if node.is_ambiguous:
if not node.is_intermediate and node.is_ambiguous:
result = self.output_stack.pop() result = self.output_stack.pop()
if self.output_stack: if self.output_stack:
self.output_stack[-1].children.append(result) self.output_stack[-1].children.append(result)
@@ -386,8 +410,8 @@ class ForestToPyDotVisitor(ForestVisitor):
graph_node_id = str(id(node)) graph_node_id = str(id(node))
graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"')) graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
graph_node_color = 0x808080 graph_node_color = 0x808080
graph_node_style = "filled"
graph_node_shape = "polygon"
graph_node_style = "\"filled,rounded\""
graph_node_shape = "diamond"
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label) graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
self.graph.add_node(graph_node) self.graph.add_node(graph_node)


@@ -422,7 +446,7 @@ class ForestToPyDotVisitor(ForestVisitor):
graph_node_id = str(id(node)) graph_node_id = str(id(node))
graph_node_label = repr(node) graph_node_label = repr(node)
graph_node_color = 0x808080 graph_node_color = 0x808080
graph_node_style = "filled"
graph_node_style = "\"filled\""
if node.is_intermediate: if node.is_intermediate:
graph_node_shape = "ellipse" graph_node_shape = "ellipse"
else: else:
@@ -438,4 +462,3 @@ class ForestToPyDotVisitor(ForestVisitor):
child_graph_node_id = str(id(child)) child_graph_node_id = str(id(child))
child_graph_node = self.graph.get_node(child_graph_node_id)[0] child_graph_node = self.graph.get_node(child_graph_node_id)[0]
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node)) self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))


+ 112
- 25
lark/parsers/xearley.py View File

@@ -24,7 +24,7 @@ from ..tree import Tree
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal, Terminal from ..grammar import NonTerminal, Terminal
from .earley import ApplyCallbacks from .earley import ApplyCallbacks
from .earley_common import Item
from .earley_common import Item, TransitiveItem
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode




@@ -37,6 +37,7 @@ class Parser:
self.complete_lex = complete_lex self.complete_lex = complete_lex


self.FIRST = analysis.FIRST self.FIRST = analysis.FIRST
self.NULLABLE = analysis.NULLABLE
self.callbacks = {} self.callbacks = {}
self.predictions = {} self.predictions = {}


@@ -64,17 +65,71 @@ class Parser:
node_cache = {} node_cache = {}
token_cache = {} token_cache = {}
columns = [] columns = []
transitives = []


text_line = 1 text_line = 1
text_column = 1 text_column = 1


def make_symbol_node(s, start, end):
label = (s, start, end)
if label in node_cache:
node = node_cache[label]
def is_quasi_complete(item):
if item.is_complete:
return True

quasi = item.advance()
while not quasi.is_complete:
symbol = quasi.expect
if symbol not in self.NULLABLE:
return False
if quasi.rule.origin == start_symbol and symbol == start_symbol:
return False
quasi = quasi.advance()
return True

def create_leo_transitives(item, trule, previous, visited = None):
if visited is None:
visited = set()

if item.rule.origin in transitives[item.start]:
previous = trule = transitives[item.start][item.rule.origin]
return trule, previous

is_empty_rule = not self.FIRST[item.rule.origin]
if is_empty_rule:
return trule, previous

originator = None
for key in columns[item.start]:
if key.expect is not None and key.expect == item.rule.origin:
if originator is not None:
return trule, previous
originator = key

if originator is None:
return trule, previous

if originator in visited:
return trule, previous

visited.add(originator)
if not is_quasi_complete(originator):
return trule, previous

trule = originator.advance()
if originator.start != item.start:
visited.clear()

trule, previous = create_leo_transitives(originator, trule, previous, visited)
if trule is None:
return trule, previous

titem = None
if previous is not None:
titem = TransitiveItem(item.rule.origin, trule, originator, previous.column)
previous.next_titem = titem
else: else:
node = node_cache[label] = SymbolNode(s, start, end)
return node
titem = TransitiveItem(item.rule.origin, trule, originator, item.start)

previous = transitives[item.start][item.rule.origin] = titem
return trule, previous


def predict_and_complete(i, to_scan): def predict_and_complete(i, to_scan):
"""The core Earley Predictor and Completer. """The core Earley Predictor and Completer.
@@ -95,23 +150,26 @@ class Parser:
### The Earley completer ### The Earley completer
if item.is_complete: ### (item.s == string) if item.is_complete: ### (item.s == string)
if item.node is None: if item.node is None:
item.node = make_symbol_node(item.s, item.start, i)
label = (item.s, item.start, i)
item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
item.node.add_family(item.s, item.rule, item.start, None, None) item.node.add_family(item.s, item.rule, item.start, None, None)


# Empty has 0 length. If we complete an empty symbol in a particular
# parse step, we need to be able to use that same empty symbol to complete
# any predictions that result, that themselves require empty. Avoids
# infinite recursion on empty symbols.
# held_completions is 'H' in E.Scott's paper.
is_empty_item = item.start == i
if is_empty_item:
held_completions[item.rule.origin] = item.node

originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
for originator in originators:
new_item = originator.advance()
new_item.node = make_symbol_node(new_item.s, originator.start, i)
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, originator.node, item.node)
create_leo_transitives(item, None, None)

###R Joop Leo right recursion Completer
if item.rule.origin in transitives[item.start]:
transitive = transitives[item.start][item.s]
if transitive.previous in transitives[transitive.column]:
root_transitive = transitives[transitive.column][transitive.previous]
else:
root_transitive = transitive

label = (root_transitive.s, root_transitive.start, i)
node = vn = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
vn.add_path(root_transitive, item.node)

new_item = Item(transitive.rule, transitive.ptr, transitive.start)
new_item.node = vn
if new_item.expect in self.TERMINALS: if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q # Add (B :: aC.B, h, y) to Q
to_scan.add(new_item) to_scan.add(new_item)
@@ -119,6 +177,30 @@ class Parser:
# Add (B :: aC.B, h, y) to Ei and R # Add (B :: aC.B, h, y) to Ei and R
column.add(new_item) column.add(new_item)
items.append(new_item) items.append(new_item)
###R Regular Earley completer
else:
# Empty has 0 length. If we complete an empty symbol in a particular
# parse step, we need to be able to use that same empty symbol to complete
# any predictions that result, that themselves require empty. Avoids
# infinite recursion on empty symbols.
# held_completions is 'H' in E.Scott's paper.
is_empty_item = item.start == i
if is_empty_item:
held_completions[item.rule.origin] = item.node

originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
for originator in originators:
new_item = originator.advance()
label = (new_item.s, originator.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q
to_scan.add(new_item)
elif new_item not in column:
# Add (B :: aC.B, h, y) to Ei and R
column.add(new_item)
items.append(new_item)


### The Earley predictor ### The Earley predictor
elif item.expect in self.NON_TERMINALS: ### (item.s == lr0) elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
@@ -130,7 +212,8 @@ class Parser:
# Process any held completions (H). # Process any held completions (H).
if item.expect in held_completions: if item.expect in held_completions:
new_item = item.advance() new_item = item.advance()
new_item.node = make_symbol_node(new_item.s, item.start, i)
label = (new_item.s, item.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect]) new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
new_items.append(new_item) new_items.append(new_item)


@@ -190,6 +273,8 @@ class Parser:
next_to_scan = set() next_to_scan = set()
next_set = set() next_set = set()
columns.append(next_set) columns.append(next_set)
next_transitives = dict()
transitives.append(next_transitives)


## 4) Process Tokens from delayed_matches. ## 4) Process Tokens from delayed_matches.
# This is the core of the Earley scanner. Create an SPPF node for each Token, # This is the core of the Earley scanner. Create an SPPF node for each Token,
@@ -199,8 +284,8 @@ class Parser:
for item, start, token in delayed_matches[i+1]: for item, start, token in delayed_matches[i+1]:
if token is not None: if token is not None:
new_item = item.advance() new_item = item.advance()
# new_item.start = start # Should we update this to account for gaps due to ignores?
new_item.node = make_symbol_node(new_item.s, new_item.start, i)
label = (new_item.s, new_item.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token) new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)
else: else:
new_item = item new_item = item
@@ -221,6 +306,7 @@ class Parser:


# Main loop starts # Main loop starts
columns.append(set()) columns.append(set())
transitives.append(dict())


## The scan buffer. 'Q' in E.Scott's paper. ## The scan buffer. 'Q' in E.Scott's paper.
to_scan = set() to_scan = set()
@@ -248,6 +334,7 @@ class Parser:
# step in the Earley pass. # step in the Earley pass.
node_cache.clear() node_cache.clear()
token_cache.clear() token_cache.clear()
node_cache.clear()
to_scan = scan(i, to_scan) to_scan = scan(i, to_scan)


if token == '\n': if token == '\n':


Loading…
Cancel
Save