@@ -16,7 +16,7 @@ from ..visitors import Transformer_InPlace, v_args | |||||
from ..exceptions import ParseError, UnexpectedToken | from ..exceptions import ParseError, UnexpectedToken | ||||
from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
from ..grammar import NonTerminal | from ..grammar import NonTerminal | ||||
from .earley_common import Item | |||||
from .earley_common import Item, TransitiveItem | |||||
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode | from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode | ||||
from collections import deque, defaultdict | from collections import deque, defaultdict | ||||
@@ -28,6 +28,7 @@ class Parser: | |||||
self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
self.FIRST = analysis.FIRST | self.FIRST = analysis.FIRST | ||||
self.NULLABLE = analysis.NULLABLE | |||||
self.callbacks = {} | self.callbacks = {} | ||||
self.predictions = {} | self.predictions = {} | ||||
@@ -56,14 +57,68 @@ class Parser: | |||||
node_cache = {} | node_cache = {} | ||||
token_cache = {} | token_cache = {} | ||||
columns = [] | columns = [] | ||||
def make_symbol_node(s, start, end): | |||||
label = (s, start, end) | |||||
if label in node_cache: | |||||
node = node_cache[label] | |||||
transitives = [] | |||||
def is_quasi_complete(item): | |||||
if item.is_complete: | |||||
return True | |||||
quasi = item.advance() | |||||
while not quasi.is_complete: | |||||
symbol = quasi.expect | |||||
if symbol not in self.NULLABLE: | |||||
return False | |||||
if quasi.rule.origin == start_symbol and symbol == start_symbol: | |||||
return False | |||||
quasi = quasi.advance() | |||||
return True | |||||
def create_leo_transitives(item, trule, previous, visited = None): | |||||
if visited is None: | |||||
visited = set() | |||||
if item.rule.origin in transitives[item.start]: | |||||
previous = trule = transitives[item.start][item.rule.origin] | |||||
return trule, previous | |||||
is_empty_rule = not self.FIRST[item.rule.origin] | |||||
if is_empty_rule: | |||||
return trule, previous | |||||
originator = None | |||||
for key in columns[item.start]: | |||||
if key.expect is not None and key.expect == item.rule.origin: | |||||
if originator is not None: | |||||
return trule, previous | |||||
originator = key | |||||
if originator is None: | |||||
return trule, previous | |||||
if originator in visited: | |||||
return trule, previous | |||||
visited.add(originator) | |||||
if not is_quasi_complete(originator): | |||||
return trule, previous | |||||
trule = originator.advance() | |||||
if originator.start != item.start: | |||||
visited.clear() | |||||
trule, previous = create_leo_transitives(originator, trule, previous, visited) | |||||
if trule is None: | |||||
return trule, previous | |||||
titem = None | |||||
if previous is not None: | |||||
titem = TransitiveItem(item.rule.origin, trule, originator, previous.column) | |||||
previous.next_titem = titem | |||||
else: | else: | ||||
node = node_cache[label] = SymbolNode(s, start, end) | |||||
return node | |||||
titem = TransitiveItem(item.rule.origin, trule, originator, item.start) | |||||
previous = transitives[item.start][item.rule.origin] = titem | |||||
return trule, previous | |||||
def predict_and_complete(i, to_scan): | def predict_and_complete(i, to_scan): | ||||
"""The core Earley Predictor and Completer. | """The core Earley Predictor and Completer. | ||||
@@ -84,23 +139,26 @@ class Parser: | |||||
### The Earley completer | ### The Earley completer | ||||
if item.is_complete: ### (item.s == string) | if item.is_complete: ### (item.s == string) | ||||
if item.node is None: | if item.node is None: | ||||
item.node = make_symbol_node(item.s, item.start, i) | |||||
label = (item.s, item.start, i) | |||||
item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
item.node.add_family(item.s, item.rule, item.start, None, None) | item.node.add_family(item.s, item.rule, item.start, None, None) | ||||
# Empty has 0 length. If we complete an empty symbol in a particular | |||||
# parse step, we need to be able to use that same empty symbol to complete | |||||
# any predictions that result, that themselves require empty. Avoids | |||||
# infinite recursion on empty symbols. | |||||
# held_completions is 'H' in E.Scott's paper. | |||||
is_empty_item = item.start == i | |||||
if is_empty_item: | |||||
held_completions[item.rule.origin] = item.node | |||||
originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s] | |||||
for originator in originators: | |||||
new_item = originator.advance() | |||||
new_item.node = make_symbol_node(new_item.s, originator.start, i) | |||||
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, originator.node, item.node) | |||||
create_leo_transitives(item, None, None) | |||||
###R Joop Leo right recursion Completer | |||||
if item.rule.origin in transitives[item.start]: | |||||
transitive = transitives[item.start][item.s] | |||||
if transitive.previous in transitives[transitive.column]: | |||||
root_transitive = transitives[transitive.column][transitive.previous] | |||||
else: | |||||
root_transitive = transitive | |||||
label = (root_transitive.s, root_transitive.start, i) | |||||
node = vn = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
vn.add_path(root_transitive, item.node) | |||||
new_item = Item(transitive.rule, transitive.ptr, transitive.start) | |||||
new_item.node = vn | |||||
if new_item.expect in self.TERMINALS: | if new_item.expect in self.TERMINALS: | ||||
# Add (B :: aC.B, h, y) to Q | # Add (B :: aC.B, h, y) to Q | ||||
to_scan.add(new_item) | to_scan.add(new_item) | ||||
@@ -108,6 +166,30 @@ class Parser: | |||||
# Add (B :: aC.B, h, y) to Ei and R | # Add (B :: aC.B, h, y) to Ei and R | ||||
column.add(new_item) | column.add(new_item) | ||||
items.append(new_item) | items.append(new_item) | ||||
###R Regular Earley completer | |||||
else: | |||||
# Empty has 0 length. If we complete an empty symbol in a particular | |||||
# parse step, we need to be able to use that same empty symbol to complete | |||||
# any predictions that result, that themselves require empty. Avoids | |||||
# infinite recursion on empty symbols. | |||||
# held_completions is 'H' in E.Scott's paper. | |||||
is_empty_item = item.start == i | |||||
if is_empty_item: | |||||
held_completions[item.rule.origin] = item.node | |||||
originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s] | |||||
for originator in originators: | |||||
new_item = originator.advance() | |||||
label = (new_item.s, originator.start, i) | |||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node) | |||||
if new_item.expect in self.TERMINALS: | |||||
# Add (B :: aC.B, h, y) to Q | |||||
to_scan.add(new_item) | |||||
elif new_item not in column: | |||||
# Add (B :: aC.B, h, y) to Ei and R | |||||
column.add(new_item) | |||||
items.append(new_item) | |||||
### The Earley predictor | ### The Earley predictor | ||||
elif item.expect in self.NON_TERMINALS: ### (item.s == lr0) | elif item.expect in self.NON_TERMINALS: ### (item.s == lr0) | ||||
@@ -119,7 +201,8 @@ class Parser: | |||||
# Process any held completions (H). | # Process any held completions (H). | ||||
if item.expect in held_completions: | if item.expect in held_completions: | ||||
new_item = item.advance() | new_item = item.advance() | ||||
new_item.node = make_symbol_node(new_item.s, item.start, i) | |||||
label = (new_item.s, item.start, i) | |||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect]) | new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect]) | ||||
new_items.append(new_item) | new_items.append(new_item) | ||||
@@ -141,11 +224,14 @@ class Parser: | |||||
next_to_scan = set() | next_to_scan = set() | ||||
next_set = set() | next_set = set() | ||||
columns.append(next_set) | columns.append(next_set) | ||||
next_transitives = dict() | |||||
transitives.append(next_transitives) | |||||
for item in set(to_scan): | for item in set(to_scan): | ||||
if match(item.expect, token): | if match(item.expect, token): | ||||
new_item = item.advance() | new_item = item.advance() | ||||
new_item.node = make_symbol_node(new_item.s, new_item.start, i) | |||||
label = (new_item.s, new_item.start, i) | |||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token) | new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token) | ||||
if new_item.expect in self.TERMINALS: | if new_item.expect in self.TERMINALS: | ||||
@@ -163,6 +249,7 @@ class Parser: | |||||
# Main loop starts | # Main loop starts | ||||
columns.append(set()) | columns.append(set()) | ||||
transitives.append(dict()) | |||||
## The scan buffer. 'Q' in E.Scott's paper. | ## The scan buffer. 'Q' in E.Scott's paper. | ||||
to_scan = set() | to_scan = set() | ||||
@@ -13,12 +13,13 @@ | |||||
# Author: Erez Shinan (2017) | # Author: Erez Shinan (2017) | ||||
# Email : erezshin@gmail.com | # Email : erezshin@gmail.com | ||||
from ..grammar import NonTerminal, Terminal | |||||
class Item(object): | class Item(object): | ||||
"An Earley Item, the atom of the algorithm." | "An Earley Item, the atom of the algorithm." | ||||
__slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'node', '_hash') | |||||
__slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash') | |||||
def __init__(self, rule, ptr, start): | def __init__(self, rule, ptr, start): | ||||
assert isinstance(start, int), "start is not an int" | |||||
self.is_complete = len(rule.expansion) == ptr | self.is_complete = len(rule.expansion) == ptr | ||||
self.rule = rule # rule | self.rule = rule # rule | ||||
self.ptr = ptr # ptr | self.ptr = ptr # ptr | ||||
@@ -27,13 +28,15 @@ class Item(object): | |||||
if self.is_complete: | if self.is_complete: | ||||
self.s = rule.origin | self.s = rule.origin | ||||
self.expect = None | self.expect = None | ||||
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None | |||||
else: | else: | ||||
self.s = (rule, ptr) | self.s = (rule, ptr) | ||||
self.expect = rule.expansion[ptr] | self.expect = rule.expansion[ptr] | ||||
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None | |||||
self._hash = hash((self.s, self.start)) | self._hash = hash((self.s, self.start)) | ||||
def advance(self): | def advance(self): | ||||
return self.__class__(self.rule, self.ptr + 1, self.start) | |||||
return Item(self.rule, self.ptr + 1, self.start) | |||||
def __eq__(self, other): | def __eq__(self, other): | ||||
return self is other or (self.s == other.s and self.start == other.start) | return self is other or (self.s == other.s and self.start == other.start) | ||||
@@ -42,4 +45,31 @@ class Item(object): | |||||
return self._hash | return self._hash | ||||
def __repr__(self): | def __repr__(self): | ||||
return '%s (%d)' % (self.s if self.is_complete else self.rule.origin, self.start) | |||||
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] ) | |||||
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] ) | |||||
symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after)) | |||||
return '%s (%d)' % (symbol, self.start) | |||||
class TransitiveItem(Item): | |||||
__slots__ = ('recognized', 'reduction', 'column', 'next_titem') | |||||
def __init__(self, recognized, trule, originator, start): | |||||
super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start) | |||||
self.recognized = recognized | |||||
self.reduction = originator | |||||
self.column = start | |||||
self.next_titem = None | |||||
self._hash = hash((self.s, self.start, self.recognized)) | |||||
def __eq__(self, other): | |||||
if not isinstance(other, TransitiveItem): | |||||
return False | |||||
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized) | |||||
def __hash__(self): | |||||
return self._hash | |||||
def __repr__(self): | |||||
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] ) | |||||
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] ) | |||||
return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start) |
@@ -12,7 +12,7 @@ from ..tree import Tree | |||||
from ..exceptions import ParseError | from ..exceptions import ParseError | ||||
from ..lexer import Token | from ..lexer import Token | ||||
from ..utils import Str | from ..utils import Str | ||||
from ..grammar import NonTerminal, Terminal | |||||
from ..grammar import NonTerminal, Terminal, Symbol | |||||
from collections import deque | from collections import deque | ||||
from importlib import import_module | from importlib import import_module | ||||
@@ -34,42 +34,65 @@ class SymbolNode(ForestNode): | |||||
Hence a Symbol Node with a single child is unambiguous. | Hence a Symbol Node with a single child is unambiguous. | ||||
""" | """ | ||||
__slots__ = ('s', 'start', 'end', 'children', 'priority', 'is_intermediate') | |||||
__slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash') | |||||
def __init__(self, s, start, end): | def __init__(self, s, start, end): | ||||
self.s = s | self.s = s | ||||
self.start = start | self.start = start | ||||
self.end = end | self.end = end | ||||
self.children = set() | |||||
self._children = set() | |||||
self.paths = set() | |||||
self.paths_loaded = False | |||||
self.priority = None | self.priority = None | ||||
self.is_intermediate = isinstance(s, tuple) | self.is_intermediate = isinstance(s, tuple) | ||||
self._hash = hash((self.s, self.start, self.end)) | |||||
def add_family(self, lr0, rule, start, left, right): | def add_family(self, lr0, rule, start, left, right): | ||||
self.children.add(PackedNode(self, lr0, rule, start, left, right)) | |||||
self._children.add(PackedNode(self, lr0, rule, start, left, right)) | |||||
def add_path(self, transitive, node): | |||||
self.paths.add((transitive, node)) | |||||
def load_paths(self): | |||||
for transitive, node in self.paths: | |||||
if transitive.next_titem is not None: | |||||
vn = SymbolNode(transitive.next_titem.s, transitive.next_titem.start, self.end) | |||||
vn.add_path(transitive.next_titem, node) | |||||
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn) | |||||
else: | |||||
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node) | |||||
self.paths_loaded = True | |||||
@property | @property | ||||
def is_ambiguous(self): | def is_ambiguous(self): | ||||
return len(self.children) > 1 | return len(self.children) > 1 | ||||
@property | |||||
def children(self): | |||||
if not self.paths_loaded: | |||||
self.load_paths() | |||||
return self._children | |||||
def __iter__(self): | def __iter__(self): | ||||
return iter(self.children) | |||||
return iter(self._children) | |||||
def __eq__(self, other): | def __eq__(self, other): | ||||
if not isinstance(other, SymbolNode): | if not isinstance(other, SymbolNode): | ||||
return False | return False | ||||
return self is other or (self.s == other.s and self.start == other.start and self.end is other.end) | |||||
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end) | |||||
def __hash__(self): | def __hash__(self): | ||||
return hash((self.s, self.start, self.end)) | |||||
return self._hash | |||||
def __repr__(self): | def __repr__(self): | ||||
if self.is_intermediate: | if self.is_intermediate: | ||||
rule = self.s[0] | rule = self.s[0] | ||||
ptr = self.s[1] | ptr = self.s[1] | ||||
names = [ "{}*".format(expansion.name) if index == ptr else expansion.name for index, expansion in enumerate(rule.expansion) ] | |||||
symbol = "{} ::= {}".format(rule.origin.name, ' '.join(names)) | |||||
before = ( expansion.name for expansion in rule.expansion[:ptr] ) | |||||
after = ( expansion.name for expansion in rule.expansion[ptr:] ) | |||||
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) | |||||
else: | else: | ||||
symbol = self.s.name | symbol = self.s.name | ||||
return "(%s, %d, %d, %d)" % (symbol, self.start, self.end, self.priority if self.priority is not None else 0) | |||||
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority if self.priority is not None else 0) | |||||
class PackedNode(ForestNode): | class PackedNode(ForestNode): | ||||
""" | """ | ||||
@@ -115,11 +138,12 @@ class PackedNode(ForestNode): | |||||
if isinstance(self.s, tuple): | if isinstance(self.s, tuple): | ||||
rule = self.s[0] | rule = self.s[0] | ||||
ptr = self.s[1] | ptr = self.s[1] | ||||
names = [ "{}*".format(expansion.name) if index == ptr else expansion.name for index, expansion in enumerate(rule.expansion) ] | |||||
symbol = "{} ::= {}".format(rule.origin.name, ' '.join(names)) | |||||
before = ( expansion.name for expansion in rule.expansion[:ptr] ) | |||||
after = ( expansion.name for expansion in rule.expansion[ptr:] ) | |||||
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) | |||||
else: | else: | ||||
symbol = self.s.name | symbol = self.s.name | ||||
return "{%s, %d, %d}" % (symbol, self.start, self.priority if self.priority is not None else 0) | |||||
return "({}, {}, {})".format(symbol, self.start, self.priority) | |||||
class ForestVisitor(object): | class ForestVisitor(object): | ||||
""" | """ | ||||
@@ -182,8 +206,8 @@ class ForestVisitor(object): | |||||
current_id = id(current) | current_id = id(current) | ||||
if current_id in visiting: | if current_id in visiting: | ||||
if isinstance(current, PackedNode): vpno(current) | |||||
else: vsno(current) | |||||
if isinstance(current, PackedNode): vpno(current) | |||||
else: vsno(current) | |||||
input_stack.pop() | input_stack.pop() | ||||
visiting.remove(current_id) | visiting.remove(current_id) | ||||
continue | continue | ||||
@@ -226,7 +250,7 @@ class ForestSumVisitor(ForestVisitor): | |||||
def visit_symbol_node_out(self, node): | def visit_symbol_node_out(self, node): | ||||
node.priority = max(child.priority for child in node.children) | node.priority = max(child.priority for child in node.children) | ||||
node.children = sorted(node.children, reverse = True) | |||||
node._children = sorted(node.children, reverse = True) | |||||
class ForestAntiscoreSumVisitor(ForestSumVisitor): | class ForestAntiscoreSumVisitor(ForestSumVisitor): | ||||
""" | """ | ||||
@@ -240,7 +264,7 @@ class ForestAntiscoreSumVisitor(ForestSumVisitor): | |||||
""" | """ | ||||
def visit_symbol_node_out(self, node): | def visit_symbol_node_out(self, node): | ||||
node.priority = min(child.priority for child in node.children) | node.priority = min(child.priority for child in node.children) | ||||
node.children = sorted(node.children, key=AntiscoreSumComparator, reverse = True) | |||||
node._children = sorted(node.children, key=AntiscoreSumComparator, reverse = True) | |||||
class AntiscoreSumComparator(object): | class AntiscoreSumComparator(object): | ||||
""" | """ | ||||
@@ -342,7 +366,7 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor): | |||||
return iter(node.children) | return iter(node.children) | ||||
def visit_symbol_node_out(self, node): | def visit_symbol_node_out(self, node): | ||||
if node.is_ambiguous: | |||||
if not node.is_intermediate and node.is_ambiguous: | |||||
result = self.output_stack.pop() | result = self.output_stack.pop() | ||||
if self.output_stack: | if self.output_stack: | ||||
self.output_stack[-1].children.append(result) | self.output_stack[-1].children.append(result) | ||||
@@ -386,8 +410,8 @@ class ForestToPyDotVisitor(ForestVisitor): | |||||
graph_node_id = str(id(node)) | graph_node_id = str(id(node)) | ||||
graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"')) | graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"')) | ||||
graph_node_color = 0x808080 | graph_node_color = 0x808080 | ||||
graph_node_style = "filled" | |||||
graph_node_shape = "polygon" | |||||
graph_node_style = "\"filled,rounded\"" | |||||
graph_node_shape = "diamond" | |||||
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label) | graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label) | ||||
self.graph.add_node(graph_node) | self.graph.add_node(graph_node) | ||||
@@ -422,7 +446,7 @@ class ForestToPyDotVisitor(ForestVisitor): | |||||
graph_node_id = str(id(node)) | graph_node_id = str(id(node)) | ||||
graph_node_label = repr(node) | graph_node_label = repr(node) | ||||
graph_node_color = 0x808080 | graph_node_color = 0x808080 | ||||
graph_node_style = "filled" | |||||
graph_node_style = "\"filled\"" | |||||
if node.is_intermediate: | if node.is_intermediate: | ||||
graph_node_shape = "ellipse" | graph_node_shape = "ellipse" | ||||
else: | else: | ||||
@@ -438,4 +462,3 @@ class ForestToPyDotVisitor(ForestVisitor): | |||||
child_graph_node_id = str(id(child)) | child_graph_node_id = str(id(child)) | ||||
child_graph_node = self.graph.get_node(child_graph_node_id)[0] | child_graph_node = self.graph.get_node(child_graph_node_id)[0] | ||||
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node)) | self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node)) | ||||
@@ -24,7 +24,7 @@ from ..tree import Tree | |||||
from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
from ..grammar import NonTerminal, Terminal | from ..grammar import NonTerminal, Terminal | ||||
from .earley import ApplyCallbacks | from .earley import ApplyCallbacks | ||||
from .earley_common import Item | |||||
from .earley_common import Item, TransitiveItem | |||||
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode | from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode | ||||
@@ -37,6 +37,7 @@ class Parser: | |||||
self.complete_lex = complete_lex | self.complete_lex = complete_lex | ||||
self.FIRST = analysis.FIRST | self.FIRST = analysis.FIRST | ||||
self.NULLABLE = analysis.NULLABLE | |||||
self.callbacks = {} | self.callbacks = {} | ||||
self.predictions = {} | self.predictions = {} | ||||
@@ -64,17 +65,71 @@ class Parser: | |||||
node_cache = {} | node_cache = {} | ||||
token_cache = {} | token_cache = {} | ||||
columns = [] | columns = [] | ||||
transitives = [] | |||||
text_line = 1 | text_line = 1 | ||||
text_column = 1 | text_column = 1 | ||||
def make_symbol_node(s, start, end): | |||||
label = (s, start, end) | |||||
if label in node_cache: | |||||
node = node_cache[label] | |||||
def is_quasi_complete(item): | |||||
if item.is_complete: | |||||
return True | |||||
quasi = item.advance() | |||||
while not quasi.is_complete: | |||||
symbol = quasi.expect | |||||
if symbol not in self.NULLABLE: | |||||
return False | |||||
if quasi.rule.origin == start_symbol and symbol == start_symbol: | |||||
return False | |||||
quasi = quasi.advance() | |||||
return True | |||||
def create_leo_transitives(item, trule, previous, visited = None): | |||||
if visited is None: | |||||
visited = set() | |||||
if item.rule.origin in transitives[item.start]: | |||||
previous = trule = transitives[item.start][item.rule.origin] | |||||
return trule, previous | |||||
is_empty_rule = not self.FIRST[item.rule.origin] | |||||
if is_empty_rule: | |||||
return trule, previous | |||||
originator = None | |||||
for key in columns[item.start]: | |||||
if key.expect is not None and key.expect == item.rule.origin: | |||||
if originator is not None: | |||||
return trule, previous | |||||
originator = key | |||||
if originator is None: | |||||
return trule, previous | |||||
if originator in visited: | |||||
return trule, previous | |||||
visited.add(originator) | |||||
if not is_quasi_complete(originator): | |||||
return trule, previous | |||||
trule = originator.advance() | |||||
if originator.start != item.start: | |||||
visited.clear() | |||||
trule, previous = create_leo_transitives(originator, trule, previous, visited) | |||||
if trule is None: | |||||
return trule, previous | |||||
titem = None | |||||
if previous is not None: | |||||
titem = TransitiveItem(item.rule.origin, trule, originator, previous.column) | |||||
previous.next_titem = titem | |||||
else: | else: | ||||
node = node_cache[label] = SymbolNode(s, start, end) | |||||
return node | |||||
titem = TransitiveItem(item.rule.origin, trule, originator, item.start) | |||||
previous = transitives[item.start][item.rule.origin] = titem | |||||
return trule, previous | |||||
def predict_and_complete(i, to_scan): | def predict_and_complete(i, to_scan): | ||||
"""The core Earley Predictor and Completer. | """The core Earley Predictor and Completer. | ||||
@@ -95,23 +150,26 @@ class Parser: | |||||
### The Earley completer | ### The Earley completer | ||||
if item.is_complete: ### (item.s == string) | if item.is_complete: ### (item.s == string) | ||||
if item.node is None: | if item.node is None: | ||||
item.node = make_symbol_node(item.s, item.start, i) | |||||
label = (item.s, item.start, i) | |||||
item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
item.node.add_family(item.s, item.rule, item.start, None, None) | item.node.add_family(item.s, item.rule, item.start, None, None) | ||||
# Empty has 0 length. If we complete an empty symbol in a particular | |||||
# parse step, we need to be able to use that same empty symbol to complete | |||||
# any predictions that result, that themselves require empty. Avoids | |||||
# infinite recursion on empty symbols. | |||||
# held_completions is 'H' in E.Scott's paper. | |||||
is_empty_item = item.start == i | |||||
if is_empty_item: | |||||
held_completions[item.rule.origin] = item.node | |||||
originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s] | |||||
for originator in originators: | |||||
new_item = originator.advance() | |||||
new_item.node = make_symbol_node(new_item.s, originator.start, i) | |||||
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, originator.node, item.node) | |||||
create_leo_transitives(item, None, None) | |||||
###R Joop Leo right recursion Completer | |||||
if item.rule.origin in transitives[item.start]: | |||||
transitive = transitives[item.start][item.s] | |||||
if transitive.previous in transitives[transitive.column]: | |||||
root_transitive = transitives[transitive.column][transitive.previous] | |||||
else: | |||||
root_transitive = transitive | |||||
label = (root_transitive.s, root_transitive.start, i) | |||||
node = vn = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
vn.add_path(root_transitive, item.node) | |||||
new_item = Item(transitive.rule, transitive.ptr, transitive.start) | |||||
new_item.node = vn | |||||
if new_item.expect in self.TERMINALS: | if new_item.expect in self.TERMINALS: | ||||
# Add (B :: aC.B, h, y) to Q | # Add (B :: aC.B, h, y) to Q | ||||
to_scan.add(new_item) | to_scan.add(new_item) | ||||
@@ -119,6 +177,30 @@ class Parser: | |||||
# Add (B :: aC.B, h, y) to Ei and R | # Add (B :: aC.B, h, y) to Ei and R | ||||
column.add(new_item) | column.add(new_item) | ||||
items.append(new_item) | items.append(new_item) | ||||
###R Regular Earley completer | |||||
else: | |||||
# Empty has 0 length. If we complete an empty symbol in a particular | |||||
# parse step, we need to be able to use that same empty symbol to complete | |||||
# any predictions that result, that themselves require empty. Avoids | |||||
# infinite recursion on empty symbols. | |||||
# held_completions is 'H' in E.Scott's paper. | |||||
is_empty_item = item.start == i | |||||
if is_empty_item: | |||||
held_completions[item.rule.origin] = item.node | |||||
originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s] | |||||
for originator in originators: | |||||
new_item = originator.advance() | |||||
label = (new_item.s, originator.start, i) | |||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node) | |||||
if new_item.expect in self.TERMINALS: | |||||
# Add (B :: aC.B, h, y) to Q | |||||
to_scan.add(new_item) | |||||
elif new_item not in column: | |||||
# Add (B :: aC.B, h, y) to Ei and R | |||||
column.add(new_item) | |||||
items.append(new_item) | |||||
### The Earley predictor | ### The Earley predictor | ||||
elif item.expect in self.NON_TERMINALS: ### (item.s == lr0) | elif item.expect in self.NON_TERMINALS: ### (item.s == lr0) | ||||
@@ -130,7 +212,8 @@ class Parser: | |||||
# Process any held completions (H). | # Process any held completions (H). | ||||
if item.expect in held_completions: | if item.expect in held_completions: | ||||
new_item = item.advance() | new_item = item.advance() | ||||
new_item.node = make_symbol_node(new_item.s, item.start, i) | |||||
label = (new_item.s, item.start, i) | |||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect]) | new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect]) | ||||
new_items.append(new_item) | new_items.append(new_item) | ||||
@@ -190,6 +273,8 @@ class Parser: | |||||
next_to_scan = set() | next_to_scan = set() | ||||
next_set = set() | next_set = set() | ||||
columns.append(next_set) | columns.append(next_set) | ||||
next_transitives = dict() | |||||
transitives.append(next_transitives) | |||||
## 4) Process Tokens from delayed_matches. | ## 4) Process Tokens from delayed_matches. | ||||
# This is the core of the Earley scanner. Create an SPPF node for each Token, | # This is the core of the Earley scanner. Create an SPPF node for each Token, | ||||
@@ -199,8 +284,8 @@ class Parser: | |||||
for item, start, token in delayed_matches[i+1]: | for item, start, token in delayed_matches[i+1]: | ||||
if token is not None: | if token is not None: | ||||
new_item = item.advance() | new_item = item.advance() | ||||
# new_item.start = start # Should we update this to account for gaps due to ignores? | |||||
new_item.node = make_symbol_node(new_item.s, new_item.start, i) | |||||
label = (new_item.s, new_item.start, i) | |||||
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label)) | |||||
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token) | new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token) | ||||
else: | else: | ||||
new_item = item | new_item = item | ||||
@@ -221,6 +306,7 @@ class Parser: | |||||
# Main loop starts | # Main loop starts | ||||
columns.append(set()) | columns.append(set()) | ||||
transitives.append(dict()) | |||||
## The scan buffer. 'Q' in E.Scott's paper. | ## The scan buffer. 'Q' in E.Scott's paper. | ||||
to_scan = set() | to_scan = set() | ||||
@@ -248,6 +334,7 @@ class Parser: | |||||
# step in the Earley pass. | # step in the Earley pass. | ||||
node_cache.clear() | node_cache.clear() | ||||
token_cache.clear() | token_cache.clear() | ||||
node_cache.clear() | |||||
to_scan = scan(i, to_scan) | to_scan = scan(i, to_scan) | ||||
if token == '\n': | if token == '\n': | ||||