Browse Source

Added Chris' changes, Dec 2018

Merge remote-tracking branch 'origin/0.7b' into 0.7b
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.6
Erez Shinan 6 years ago
parent
commit
c968e212ff
4 changed files with 466 additions and 166 deletions
  1. +144
    -46
      lark/parsers/earley.py
  2. +32
    -37
      lark/parsers/earley_common.py
  3. +148
    -34
      lark/parsers/earley_forest.py
  4. +142
    -49
      lark/parsers/xearley.py

+ 144
- 46
lark/parsers/earley.py View File

@@ -16,17 +16,19 @@ from ..visitors import Transformer_InPlace, v_args
from ..exceptions import ParseError, UnexpectedToken from ..exceptions import ParseError, UnexpectedToken
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal from ..grammar import NonTerminal
from .earley_common import Column, Item
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor
from .earley_common import Item, TransitiveItem
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode

from collections import deque, defaultdict


class Parser: class Parser:
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor): def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor):
analysis = GrammarAnalyzer(parser_conf) analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity self.resolve_ambiguity = resolve_ambiguity
self.forest_sum_visitor = forest_sum_visitor


self.FIRST = analysis.FIRST self.FIRST = analysis.FIRST
self.NULLABLE = analysis.NULLABLE
self.callbacks = {} self.callbacks = {}
self.predictions = {} self.predictions = {}


@@ -39,6 +41,7 @@ class Parser:
self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias) self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]


self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks)
self.term_matcher = term_matcher self.term_matcher = term_matcher




@@ -46,19 +49,78 @@ class Parser:
# Define parser functions # Define parser functions
start_symbol = NonTerminal(start_symbol or self.parser_conf.start) start_symbol = NonTerminal(start_symbol or self.parser_conf.start)
match = self.term_matcher match = self.term_matcher
held_completions = defaultdict(list)

# Held Completions (H in E.Scotts paper).
held_completions = {}

# Cache for nodes & tokens created in a particular parse step.
node_cache = {} node_cache = {}
token_cache = {} token_cache = {}

def make_symbol_node(s, start, end):
label = (s, start.i, end.i)
if label in node_cache:
node = node_cache[label]
columns = []
transitives = []

def is_quasi_complete(item):
if item.is_complete:
return True

quasi = item.advance()
while not quasi.is_complete:
symbol = quasi.expect
if symbol not in self.NULLABLE:
return False
if quasi.rule.origin == start_symbol and symbol == start_symbol:
return False
quasi = quasi.advance()
return True

def create_leo_transitives(item, trule, previous, visited = None):
if visited is None:
visited = set()

if item.rule.origin in transitives[item.start]:
previous = trule = transitives[item.start][item.rule.origin]
return trule, previous

is_empty_rule = not self.FIRST[item.rule.origin]
if is_empty_rule:
return trule, previous

originator = None
for key in columns[item.start]:
if key.expect is not None and key.expect == item.rule.origin:
if originator is not None:
return trule, previous
originator = key

if originator is None:
return trule, previous

if originator in visited:
return trule, previous

visited.add(originator)
if not is_quasi_complete(originator):
return trule, previous

trule = originator.advance()
if originator.start != item.start:
visited.clear()

trule, previous = create_leo_transitives(originator, trule, previous, visited)
if trule is None:
return trule, previous

titem = None
if previous is not None:
titem = TransitiveItem(item.rule.origin, trule, originator, previous.column)
previous.next_titem = titem
else: else:
node = node_cache[label] = SymbolNode(s, start, end)
return node
titem = TransitiveItem(item.rule.origin, trule, originator, item.start)

previous = transitives[item.start][item.rule.origin] = titem
return trule, previous


def predict_and_complete(column, to_scan):
def predict_and_complete(i, to_scan):
"""The core Earley Predictor and Completer. """The core Earley Predictor and Completer.


At each stage of the input, we handling any completed items (things At each stage of the input, we handling any completed items (things
@@ -68,61 +130,90 @@ class Parser:
which can be added to the scan list for the next scanner cycle.""" which can be added to the scan list for the next scanner cycle."""
held_completions.clear() held_completions.clear()


column = columns[i]
# R (items) = Ei (column.items) # R (items) = Ei (column.items)
items = deque(column.items)
items = deque(column)
while items: while items:
item = items.pop() # remove an element, A say, from R item = items.pop() # remove an element, A say, from R


### The Earley completer ### The Earley completer
if item.is_complete: ### (item.s == string) if item.is_complete: ### (item.s == string)
if item.node is None: if item.node is None:
item.node = make_symbol_node(item.s, item.start, column)
label = (item.s, item.start, i)
item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
item.node.add_family(item.s, item.rule, item.start, None, None) item.node.add_family(item.s, item.rule, item.start, None, None)


# Empty has 0 length. If we complete an empty symbol in a particular
# parse step, we need to be able to use that same empty symbol to complete
# any predictions that result, that themselves require empty. Avoids
# infinite recursion on empty symbols.
# held_completions is 'H' in E.Scott's paper.
is_empty_item = item.start.i == column.i
if is_empty_item:
held_completions[item.rule.origin] = item.node

originators = [originator for originator in item.start.items if originator.expect is not None and originator.expect == item.s]
for originator in originators:
new_item = originator.advance()
new_item.node = make_symbol_node(new_item.s, originator.start, column)
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, originator.node, item.node)
create_leo_transitives(item, None, None)

###R Joop Leo right recursion Completer
if item.rule.origin in transitives[item.start]:
transitive = transitives[item.start][item.s]
if transitive.previous in transitives[transitive.column]:
root_transitive = transitives[transitive.column][transitive.previous]
else:
root_transitive = transitive

label = (root_transitive.s, root_transitive.start, i)
node = vn = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
vn.add_path(root_transitive, item.node)

new_item = Item(transitive.rule, transitive.ptr, transitive.start)
new_item.node = vn
if new_item.expect in self.TERMINALS: if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q # Add (B :: aC.B, h, y) to Q
to_scan.add(new_item) to_scan.add(new_item)
elif new_item not in column.items:
elif new_item not in column:
# Add (B :: aC.B, h, y) to Ei and R # Add (B :: aC.B, h, y) to Ei and R
column.add(new_item) column.add(new_item)
items.append(new_item) items.append(new_item)
###R Regular Earley completer
else:
# Empty has 0 length. If we complete an empty symbol in a particular
# parse step, we need to be able to use that same empty symbol to complete
# any predictions that result, that themselves require empty. Avoids
# infinite recursion on empty symbols.
# held_completions is 'H' in E.Scott's paper.
is_empty_item = item.start == i
if is_empty_item:
held_completions[item.rule.origin] = item.node

originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
for originator in originators:
new_item = originator.advance()
label = (new_item.s, originator.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q
to_scan.add(new_item)
elif new_item not in column:
# Add (B :: aC.B, h, y) to Ei and R
column.add(new_item)
items.append(new_item)


### The Earley predictor ### The Earley predictor
elif item.expect in self.NON_TERMINALS: ### (item.s == lr0) elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
new_items = [] new_items = []
for rule in self.predictions[item.expect]: for rule in self.predictions[item.expect]:
new_item = Item(rule, 0, column)
new_item = Item(rule, 0, i)
new_items.append(new_item) new_items.append(new_item)


# Process any held completions (H). # Process any held completions (H).
if item.expect in held_completions: if item.expect in held_completions:
new_item = item.advance() new_item = item.advance()
new_item.node = make_symbol_node(new_item.s, item.start, column)
label = (new_item.s, item.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect]) new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
new_items.append(new_item) new_items.append(new_item)


for new_item in new_items: for new_item in new_items:
if new_item.expect in self.TERMINALS: if new_item.expect in self.TERMINALS:
to_scan.add(new_item) to_scan.add(new_item)
elif new_item not in column.items:
elif new_item not in column:
column.add(new_item) column.add(new_item)
items.append(new_item) items.append(new_item)


def scan(i, token, column, to_scan):
def scan(i, token, to_scan):
"""The core Earley Scanner. """The core Earley Scanner.


This is a custom implementation of the scanner that uses the This is a custom implementation of the scanner that uses the
@@ -130,12 +221,17 @@ class Parser:
Earley predictor, based on the previously completed tokens. Earley predictor, based on the previously completed tokens.
This ensures that at each phase of the parse we have a custom This ensures that at each phase of the parse we have a custom
lexer context, allowing for more complex ambiguities.""" lexer context, allowing for more complex ambiguities."""
next_set = Column(i+1, self.FIRST)
next_to_scan = set() next_to_scan = set()
next_set = set()
columns.append(next_set)
next_transitives = dict()
transitives.append(next_transitives)

for item in set(to_scan): for item in set(to_scan):
if match(item.expect, token): if match(item.expect, token):
new_item = item.advance() new_item = item.advance()
new_item.node = make_symbol_node(new_item.s, new_item.start, column)
label = (new_item.s, new_item.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token) new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)


if new_item.expect in self.TERMINALS: if new_item.expect in self.TERMINALS:
@@ -149,11 +245,11 @@ class Parser:
expect = {i.expect.name for i in to_scan} expect = {i.expect.name for i in to_scan}
raise UnexpectedToken(token, expect, considered_rules = set(to_scan)) raise UnexpectedToken(token, expect, considered_rules = set(to_scan))


return next_set, next_to_scan
return next_to_scan


# Main loop starts # Main loop starts
column0 = Column(0, self.FIRST)
column = column0
columns.append(set())
transitives.append(dict())


## The scan buffer. 'Q' in E.Scott's paper. ## The scan buffer. 'Q' in E.Scott's paper.
to_scan = set() to_scan = set()
@@ -162,32 +258,34 @@ class Parser:
# Add predicted items to the first Earley set (for the predictor) if they # Add predicted items to the first Earley set (for the predictor) if they
# result in a non-terminal, or the scanner if they result in a terminal. # result in a non-terminal, or the scanner if they result in a terminal.
for rule in self.predictions[start_symbol]: for rule in self.predictions[start_symbol]:
item = Item(rule, 0, column0)
item = Item(rule, 0, 0)
if item.expect in self.TERMINALS: if item.expect in self.TERMINALS:
to_scan.add(item) to_scan.add(item)
else: else:
column.add(item)
columns[0].add(item)


## The main Earley loop. ## The main Earley loop.
# Run the Prediction/Completion cycle for any Items in the current Earley set. # Run the Prediction/Completion cycle for any Items in the current Earley set.
# Completions will be added to the SPPF tree, and predictions will be recursively # Completions will be added to the SPPF tree, and predictions will be recursively
# processed down to terminals/empty nodes to be added to the scanner for the next # processed down to terminals/empty nodes to be added to the scanner for the next
# step. # step.
for i, token in enumerate(stream):
predict_and_complete(column, to_scan)
i = 0
for token in stream:
predict_and_complete(i, to_scan)


# Clear the node_cache and token_cache, which are only relevant for each # Clear the node_cache and token_cache, which are only relevant for each
# step in the Earley pass. # step in the Earley pass.
node_cache.clear() node_cache.clear()
token_cache.clear() token_cache.clear()
column, to_scan = scan(i, token, column, to_scan)
to_scan = scan(i, token, to_scan)
i += 1


predict_and_complete(column, to_scan)
predict_and_complete(i, to_scan)


## Column is now the final column in the parse. If the parse was successful, the start ## Column is now the final column in the parse. If the parse was successful, the start
# symbol should have been completed in the last step of the Earley cycle, and will be in # symbol should have been completed in the last step of the Earley cycle, and will be in
# this column. Find the item for the start_symbol, which is the root of the SPPF tree. # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
solutions = [n.node for n in column.items if n.is_complete and n.node is not None and n.s == start_symbol and n.start is column0]
solutions = [n.node for n in columns[i] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]


if not solutions: if not solutions:
raise ParseError('Incomplete parse: Could not find a solution to input') raise ParseError('Incomplete parse: Could not find a solution to input')
@@ -201,7 +299,7 @@ class Parser:


# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities # ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
# according to the rules. # according to the rules.
return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go()
return self.forest_tree_visitor.go(solutions[0])


class ApplyCallbacks(Transformer_InPlace): class ApplyCallbacks(Transformer_InPlace):
def __init__(self, postprocess): def __init__(self, postprocess):


+ 32
- 37
lark/parsers/earley_common.py View File

@@ -13,27 +13,12 @@
# Author: Erez Shinan (2017) # Author: Erez Shinan (2017)
# Email : erezshin@gmail.com # Email : erezshin@gmail.com


## for recursive repr
from ..tree import Tree

class Derivation(Tree):
def __init__(self, rule, children = None):
Tree.__init__(self, 'drv', children if children is not None else [])
self.meta.rule = rule
self._hash = None

def __repr__(self, indent = 0):
return 'Derivation(%s, %s, %s)' % (self.data, self.rule.origin, '...')

def __hash__(self):
if self._hash is None:
self._hash = Tree.__hash__(self)
return self._hash
from ..grammar import NonTerminal, Terminal


class Item(object): class Item(object):
"An Earley Item, the atom of the algorithm." "An Earley Item, the atom of the algorithm."


__slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'node', '_hash')
__slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'previous', 'node', '_hash')
def __init__(self, rule, ptr, start): def __init__(self, rule, ptr, start):
self.is_complete = len(rule.expansion) == ptr self.is_complete = len(rule.expansion) == ptr
self.rule = rule # rule self.rule = rule # rule
@@ -43,38 +28,48 @@ class Item(object):
if self.is_complete: if self.is_complete:
self.s = rule.origin self.s = rule.origin
self.expect = None self.expect = None
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
else: else:
self.s = (rule, ptr) self.s = (rule, ptr)
self.expect = rule.expansion[ptr] self.expect = rule.expansion[ptr]
self._hash = hash((self.s, self.start.i))
self.previous = rule.expansion[ptr - 1] if ptr > 0 and len(rule.expansion) else None
self._hash = hash((self.s, self.start))


def advance(self): def advance(self):
return self.__class__(self.rule, self.ptr + 1, self.start)
return Item(self.rule, self.ptr + 1, self.start)


def __eq__(self, other): def __eq__(self, other):
return self is other or (self.s == other.s and self.start.i == other.start.i)
return self is other or (self.s == other.s and self.start == other.start)


def __hash__(self): def __hash__(self):
return self._hash return self._hash


def __repr__(self): def __repr__(self):
return '%s (%d)' % (self.s if self.is_complete else self.rule.origin, self.start.i)

class Column:
"An entry in the table, aka Earley Chart. Contains lists of items."
def __init__(self, i, FIRST):
self.i = i
self.items = set()
self.FIRST = FIRST
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
symbol = "{} ::= {}* {}".format(self.rule.origin.name, ' '.join(before), ' '.join(after))
return '%s (%d)' % (symbol, self.start)


class TransitiveItem(Item):
__slots__ = ('recognized', 'reduction', 'column', 'next_titem')
def __init__(self, recognized, trule, originator, start):
super(TransitiveItem, self).__init__(trule.rule, trule.ptr, trule.start)
self.recognized = recognized
self.reduction = originator
self.column = start
self.next_titem = None
self._hash = hash((self.s, self.start, self.recognized))


def add(self, item):
"""Sort items into scan/predict/reduce newslists

Makes sure only unique items are added.
"""
self.items.add(item)
def __eq__(self, other):
if not isinstance(other, TransitiveItem):
return False
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.recognized == other.recognized)


def __bool__(self):
return bool(self.items)
def __hash__(self):
return self._hash


__nonzero__ = __bool__ # Py2 backwards-compatibility
def __repr__(self):
before = ( expansion.name for expansion in self.rule.expansion[:self.ptr] )
after = ( expansion.name for expansion in self.rule.expansion[self.ptr:] )
return '{} : {} -> {}* {} ({}, {})'.format(self.recognized.name, self.rule.origin.name, ' '.join(before), ' '.join(after), self.column, self.start)

+ 148
- 34
lark/parsers/earley_forest.py View File

@@ -7,14 +7,15 @@ Full reference and more details is here:
http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/ http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
""" """


from random import randint
from ..tree import Tree from ..tree import Tree
from ..exceptions import ParseError from ..exceptions import ParseError
from ..lexer import Token from ..lexer import Token
from ..utils import Str from ..utils import Str
from ..grammar import NonTerminal, Terminal
from .earley_common import Column, Derivation
from ..grammar import NonTerminal, Terminal, Symbol


from collections import deque from collections import deque
from importlib import import_module


class ForestNode(object): class ForestNode(object):
pass pass
@@ -33,36 +34,65 @@ class SymbolNode(ForestNode):


Hence a Symbol Node with a single child is unambiguous. Hence a Symbol Node with a single child is unambiguous.
""" """
__slots__ = ('s', 'start', 'end', 'children', 'priority', 'is_intermediate')
__slots__ = ('s', 'start', 'end', '_children', 'paths', 'paths_loaded', 'priority', 'is_intermediate', '_hash')
def __init__(self, s, start, end): def __init__(self, s, start, end):
self.s = s self.s = s
self.start = start self.start = start
self.end = end self.end = end
self.children = set()
self._children = set()
self.paths = set()
self.paths_loaded = False
self.priority = None self.priority = None
self.is_intermediate = isinstance(s, tuple) self.is_intermediate = isinstance(s, tuple)
self._hash = hash((self.s, self.start, self.end))


def add_family(self, lr0, rule, start, left, right): def add_family(self, lr0, rule, start, left, right):
self.children.add(PackedNode(self, lr0, rule, start, left, right))
self._children.add(PackedNode(self, lr0, rule, start, left, right))

def add_path(self, transitive, node):
self.paths.add((transitive, node))

def load_paths(self):
for transitive, node in self.paths:
if transitive.next_titem is not None:
vn = SymbolNode(transitive.next_titem.s, transitive.next_titem.start, self.end)
vn.add_path(transitive.next_titem, node)
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, vn)
else:
self.add_family(transitive.reduction.rule.origin, transitive.reduction.rule, transitive.reduction.start, transitive.reduction.node, node)
self.paths_loaded = True


@property @property
def is_ambiguous(self): def is_ambiguous(self):
return len(self.children) > 1 return len(self.children) > 1


@property
def children(self):
if not self.paths_loaded:
self.load_paths()
return self._children

def __iter__(self): def __iter__(self):
return iter(self.children)
return iter(self._children)


def __eq__(self, other): def __eq__(self, other):
if not isinstance(other, SymbolNode): if not isinstance(other, SymbolNode):
return False return False
return self is other or (self.s == other.s and self.start == other.start and self.end is other.end)
return self is other or (type(self.s) == type(other.s) and self.s == other.s and self.start == other.start and self.end is other.end)


def __hash__(self): def __hash__(self):
return hash((self.s, self.start.i, self.end.i))
return self._hash


def __repr__(self): def __repr__(self):
symbol = self.s.name if isinstance(self.s, (NonTerminal, Terminal)) else self.s[0].origin.name
return "(%s, %d, %d, %d)" % (symbol, self.start.i, self.end.i, self.priority if self.priority is not None else 0)
if self.is_intermediate:
rule = self.s[0]
ptr = self.s[1]
before = ( expansion.name for expansion in rule.expansion[:ptr] )
after = ( expansion.name for expansion in rule.expansion[ptr:] )
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
else:
symbol = self.s.name
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority if self.priority is not None else 0)


class PackedNode(ForestNode): class PackedNode(ForestNode):
""" """
@@ -77,7 +107,7 @@ class PackedNode(ForestNode):
self.left = left self.left = left
self.right = right self.right = right
self.priority = None self.priority = None
self._hash = hash((self.s, self.start.i, self.left, self.right))
self._hash = hash((self.s, self.start, self.left, self.right))


@property @property
def is_empty(self): def is_empty(self):
@@ -105,8 +135,15 @@ class PackedNode(ForestNode):
return self._hash return self._hash


def __repr__(self): def __repr__(self):
symbol = self.s.name if isinstance(self.s, (NonTerminal, Terminal)) else self.s[0].origin.name
return "{%s, %d, %s, %s, %s}" % (symbol, self.start.i, self.left, self.right, self.priority if self.priority is not None else 0)
if isinstance(self.s, tuple):
rule = self.s[0]
ptr = self.s[1]
before = ( expansion.name for expansion in rule.expansion[:ptr] )
after = ( expansion.name for expansion in rule.expansion[ptr:] )
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
else:
symbol = self.s.name
return "({}, {}, {})".format(symbol, self.start, self.priority)


class ForestVisitor(object): class ForestVisitor(object):
""" """
@@ -114,9 +151,7 @@ class ForestVisitor(object):


Use this as a base when you need to walk the forest. Use this as a base when you need to walk the forest.
""" """
def __init__(self, root):
self.root = root
self.result = None
__slots__ = ['result']


def visit_token_node(self, node): pass def visit_token_node(self, node): pass
def visit_symbol_node_in(self, node): pass def visit_symbol_node_in(self, node): pass
@@ -124,7 +159,8 @@ class ForestVisitor(object):
def visit_packed_node_in(self, node): pass def visit_packed_node_in(self, node): pass
def visit_packed_node_out(self, node): pass def visit_packed_node_out(self, node): pass


def go(self):
def go(self, root):
self.result = None
# Visiting is a list of IDs of all symbol/intermediate nodes currently in # Visiting is a list of IDs of all symbol/intermediate nodes currently in
# the stack. It serves two purposes: to detect when we 'recurse' in and out # the stack. It serves two purposes: to detect when we 'recurse' in and out
# of a symbol/intermediate so that we can process both up and down. Also, # of a symbol/intermediate so that we can process both up and down. Also,
@@ -134,7 +170,7 @@ class ForestVisitor(object):


# We do not use recursion here to walk the Forest due to the limited # We do not use recursion here to walk the Forest due to the limited
# stack size in python. Therefore input_stack is essentially our stack. # stack size in python. Therefore input_stack is essentially our stack.
input_stack = deque([self.root])
input_stack = deque([root])


# It is much faster to cache these as locals since they are called # It is much faster to cache these as locals since they are called
# many times in large parses. # many times in large parses.
@@ -170,8 +206,8 @@ class ForestVisitor(object):


current_id = id(current) current_id = id(current)
if current_id in visiting: if current_id in visiting:
if isinstance(current, PackedNode): vpno(current)
else: vsno(current)
if isinstance(current, PackedNode): vpno(current)
else: vsno(current)
input_stack.pop() input_stack.pop()
visiting.remove(current_id) visiting.remove(current_id)
continue continue
@@ -214,7 +250,7 @@ class ForestSumVisitor(ForestVisitor):


def visit_symbol_node_out(self, node): def visit_symbol_node_out(self, node):
node.priority = max(child.priority for child in node.children) node.priority = max(child.priority for child in node.children)
node.children = sorted(node.children, reverse = True)
node._children = sorted(node.children, reverse = True)


class ForestAntiscoreSumVisitor(ForestSumVisitor): class ForestAntiscoreSumVisitor(ForestSumVisitor):
""" """
@@ -228,7 +264,7 @@ class ForestAntiscoreSumVisitor(ForestSumVisitor):
""" """
def visit_symbol_node_out(self, node): def visit_symbol_node_out(self, node):
node.priority = min(child.priority for child in node.children) node.priority = min(child.priority for child in node.children)
node.children = sorted(node.children, key=AntiscoreSumComparator, reverse = True)
node._children = sorted(node.children, key=AntiscoreSumComparator, reverse = True)


class AntiscoreSumComparator(object): class AntiscoreSumComparator(object):
""" """
@@ -263,19 +299,21 @@ class ForestToTreeVisitor(ForestVisitor):
implementation should be another ForestVisitor which sorts the children implementation should be another ForestVisitor which sorts the children
according to some priority mechanism. according to some priority mechanism.
""" """
def __init__(self, root, forest_sum_visitor = ForestSumVisitor, callbacks = None):
super(ForestToTreeVisitor, self).__init__(root)
self.forest_sum_visitor = forest_sum_visitor
self.output_stack = deque()
__slots__ = ['forest_sum_visitor', 'output_stack', 'callbacks']
def __init__(self, forest_sum_visitor = ForestSumVisitor, callbacks = None):
self.forest_sum_visitor = forest_sum_visitor()
self.callbacks = callbacks self.callbacks = callbacks
self.result = None

def go(self, root):
self.output_stack = deque()
return super(ForestToTreeVisitor, self).go(root)


def visit_token_node(self, node): def visit_token_node(self, node):
self.output_stack[-1].append(node) self.output_stack[-1].append(node)


def visit_symbol_node_in(self, node): def visit_symbol_node_in(self, node):
if node.is_ambiguous and node.priority is None: if node.is_ambiguous and node.priority is None:
self.forest_sum_visitor(node).go()
self.forest_sum_visitor.go(node)
return next(iter(node.children)) return next(iter(node.children))


def visit_packed_node_in(self, node): def visit_packed_node_in(self, node):
@@ -311,11 +349,13 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor):
This is mainly used by the test framework, to make it simpler to write This is mainly used by the test framework, to make it simpler to write
tests ensuring the SPPF contains the right results. tests ensuring the SPPF contains the right results.
""" """
def __init__(self, root, callbacks):
super(ForestToAmbiguousTreeVisitor, self).__init__(root)
self.output_stack = deque()
__slots__ = ['output_stack', 'callbacks']
def __init__(self, callbacks):
self.callbacks = callbacks self.callbacks = callbacks
self.result = None

def go(self, root):
self.output_stack = deque([])
return super(ForestToAmbiguousTreeVisitor, self).go(root)


def visit_token_node(self, node): def visit_token_node(self, node):
self.output_stack[-1].children.append(node) self.output_stack[-1].children.append(node)
@@ -326,7 +366,7 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor):
return iter(node.children) return iter(node.children)


def visit_symbol_node_out(self, node): def visit_symbol_node_out(self, node):
if node.is_ambiguous:
if not node.is_intermediate and node.is_ambiguous:
result = self.output_stack.pop() result = self.output_stack.pop()
if self.output_stack: if self.output_stack:
self.output_stack[-1].children.append(result) self.output_stack[-1].children.append(result)
@@ -347,4 +387,78 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor):
if self.output_stack: if self.output_stack:
self.output_stack[-1].children.append(result) self.output_stack[-1].children.append(result)
else: else:
self.result = result
self.result = result

class ForestToPyDotVisitor(ForestVisitor):
"""
A Forest visitor which writes the SPPF to a PNG.

The SPPF can get really large, really quickly because
of the amount of meta-data it stores, so this is probably
only useful for trivial trees and learning how the SPPF
is structured.
"""
def __init__(self, rankdir="TB"):
self.pydot = import_module('pydot')
self.graph = self.pydot.Dot(graph_type='digraph', rankdir=rankdir)

def go(self, root, filename):
super(ForestToPyDotVisitor, self).go(root)
self.graph.write_png(filename)

def visit_token_node(self, node):
graph_node_id = str(id(node))
graph_node_label = "\"{}\"".format(node.value.replace('"', '\\"'))
graph_node_color = 0x808080
graph_node_style = "\"filled,rounded\""
graph_node_shape = "diamond"
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
self.graph.add_node(graph_node)

def visit_packed_node_in(self, node):
graph_node_id = str(id(node))
graph_node_label = repr(node)
graph_node_color = 0x808080
graph_node_style = "filled"
graph_node_shape = "diamond"
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
self.graph.add_node(graph_node)
return iter([node.left, node.right])

def visit_packed_node_out(self, node):
graph_node_id = str(id(node))
graph_node = self.graph.get_node(graph_node_id)[0]
for child in [node.left, node.right]:
if child is not None:
child_graph_node_id = str(id(child))
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))
else:
#### Try and be above the Python object ID range; probably impl. specific, but maybe this is okay.
child_graph_node_id = str(randint(100000000000000000000000000000,123456789012345678901234567890))
child_graph_node_style = "invis"
child_graph_node = self.pydot.Node(child_graph_node_id, style=child_graph_node_style, label="None")
child_edge_style = "invis"
self.graph.add_node(child_graph_node)
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node, style=child_edge_style))

def visit_symbol_node_in(self, node):
graph_node_id = str(id(node))
graph_node_label = repr(node)
graph_node_color = 0x808080
graph_node_style = "\"filled\""
if node.is_intermediate:
graph_node_shape = "ellipse"
else:
graph_node_shape = "rectangle"
graph_node = self.pydot.Node(graph_node_id, style=graph_node_style, fillcolor="#{:06x}".format(graph_node_color), shape=graph_node_shape, label=graph_node_label)
self.graph.add_node(graph_node)
return iter(node.children)

def visit_symbol_node_out(self, node):
graph_node_id = str(id(node))
graph_node = self.graph.get_node(graph_node_id)[0]
for child in node.children:
child_graph_node_id = str(id(child))
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))

+ 142
- 49
lark/parsers/xearley.py View File

@@ -22,7 +22,8 @@ from ..exceptions import ParseError, UnexpectedCharacters
from ..lexer import Token from ..lexer import Token
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal, Terminal from ..grammar import NonTerminal, Terminal
from .earley_common import Column, Item
from .earley import ApplyCallbacks
from .earley_common import Item, TransitiveItem
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor




@@ -31,11 +32,11 @@ class Parser:
analysis = GrammarAnalyzer(parser_conf) analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity self.resolve_ambiguity = resolve_ambiguity
self.forest_sum_visitor = forest_sum_visitor
self.ignore = [Terminal(t) for t in ignore] self.ignore = [Terminal(t) for t in ignore]
self.complete_lex = complete_lex self.complete_lex = complete_lex


self.FIRST = analysis.FIRST self.FIRST = analysis.FIRST
self.NULLABLE = analysis.NULLABLE
self.callbacks = {} self.callbacks = {}
self.predictions = {} self.predictions = {}


@@ -43,10 +44,12 @@ class Parser:
# the slow 'isupper' in is_terminal. # the slow 'isupper' in is_terminal.
self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term } self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term } self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }

for rule in parser_conf.rules: for rule in parser_conf.rules:
self.callbacks[rule] = getattr(parser_conf.callback, rule.alias or rule.origin, None) self.callbacks[rule] = getattr(parser_conf.callback, rule.alias or rule.origin, None)
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]


self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks)
self.term_matcher = term_matcher self.term_matcher = term_matcher


def parse(self, stream, start_symbol=None): def parse(self, stream, start_symbol=None):
@@ -60,19 +63,74 @@ class Parser:
# Cache for nodes & tokens created in a particular parse step. # Cache for nodes & tokens created in a particular parse step.
node_cache = {} node_cache = {}
token_cache = {} token_cache = {}
columns = []
transitives = []


text_line = 1 text_line = 1
text_column = 1 text_column = 1


def make_symbol_node(s, start, end):
label = (s, start.i, end.i)
if label in node_cache:
node = node_cache[label]
def is_quasi_complete(item):
if item.is_complete:
return True

quasi = item.advance()
while not quasi.is_complete:
symbol = quasi.expect
if symbol not in self.NULLABLE:
return False
if quasi.rule.origin == start_symbol and symbol == start_symbol:
return False
quasi = quasi.advance()
return True

def create_leo_transitives(item, trule, previous, visited = None):
if visited is None:
visited = set()

if item.rule.origin in transitives[item.start]:
previous = trule = transitives[item.start][item.rule.origin]
return trule, previous

is_empty_rule = not self.FIRST[item.rule.origin]
if is_empty_rule:
return trule, previous

originator = None
for key in columns[item.start]:
if key.expect is not None and key.expect == item.rule.origin:
if originator is not None:
return trule, previous
originator = key

if originator is None:
return trule, previous

if originator in visited:
return trule, previous

visited.add(originator)
if not is_quasi_complete(originator):
return trule, previous

trule = originator.advance()
if originator.start != item.start:
visited.clear()

trule, previous = create_leo_transitives(originator, trule, previous, visited)
if trule is None:
return trule, previous

titem = None
if previous is not None:
titem = TransitiveItem(item.rule.origin, trule, originator, previous.column)
previous.next_titem = titem
else: else:
node = node_cache[label] = SymbolNode(s, start, end)
return node
titem = TransitiveItem(item.rule.origin, trule, originator, item.start)

previous = transitives[item.start][item.rule.origin] = titem
return trule, previous


def predict_and_complete(column, to_scan):
def predict_and_complete(i, to_scan):
"""The core Earley Predictor and Completer. """The core Earley Predictor and Completer.


At each stage of the input, we handling any completed items (things At each stage of the input, we handling any completed items (things
@@ -82,61 +140,90 @@ class Parser:
which can be added to the scan list for the next scanner cycle.""" which can be added to the scan list for the next scanner cycle."""
held_completions.clear() held_completions.clear()


column = columns[i]
# R (items) = Ei (column.items) # R (items) = Ei (column.items)
items = deque(column.items)
items = deque(column)
while items: while items:
item = items.pop() # remove an element, A say, from R item = items.pop() # remove an element, A say, from R


### The Earley completer ### The Earley completer
if item.is_complete: ### (item.s == string) if item.is_complete: ### (item.s == string)
if item.node is None: if item.node is None:
item.node = make_symbol_node(item.s, item.start, column)
label = (item.s, item.start, i)
item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
item.node.add_family(item.s, item.rule, item.start, None, None) item.node.add_family(item.s, item.rule, item.start, None, None)


# Empty has 0 length. If we complete an empty symbol in a particular
# parse step, we need to be able to use that same empty symbol to complete
# any predictions that result, that themselves require empty. Avoids
# infinite recursion on empty symbols.
# held_completions is 'H' in E.Scott's paper.
is_empty_item = item.start.i == column.i
if is_empty_item:
held_completions[item.rule.origin] = item.node

originators = [originator for originator in item.start.items if originator.expect is not None and originator.expect == item.s]
for originator in originators:
new_item = originator.advance()
new_item.node = make_symbol_node(new_item.s, originator.start, column)
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, originator.node, item.node)
create_leo_transitives(item, None, None)

###R Joop Leo right recursion Completer
if item.rule.origin in transitives[item.start]:
transitive = transitives[item.start][item.s]
if transitive.previous in transitives[transitive.column]:
root_transitive = transitives[transitive.column][transitive.previous]
else:
root_transitive = transitive

label = (root_transitive.s, root_transitive.start, i)
node = vn = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
vn.add_path(root_transitive, item.node)

new_item = Item(transitive.rule, transitive.ptr, transitive.start)
new_item.node = vn
if new_item.expect in self.TERMINALS: if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q # Add (B :: aC.B, h, y) to Q
to_scan.add(new_item) to_scan.add(new_item)
elif new_item not in column.items:
elif new_item not in column:
# Add (B :: aC.B, h, y) to Ei and R # Add (B :: aC.B, h, y) to Ei and R
column.add(new_item) column.add(new_item)
items.append(new_item) items.append(new_item)
###R Regular Earley completer
else:
# Empty has 0 length. If we complete an empty symbol in a particular
# parse step, we need to be able to use that same empty symbol to complete
# any predictions that result, that themselves require empty. Avoids
# infinite recursion on empty symbols.
# held_completions is 'H' in E.Scott's paper.
is_empty_item = item.start == i
if is_empty_item:
held_completions[item.rule.origin] = item.node

originators = [originator for originator in columns[item.start] if originator.expect is not None and originator.expect == item.s]
for originator in originators:
new_item = originator.advance()
label = (new_item.s, originator.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, i, originator.node, item.node)
if new_item.expect in self.TERMINALS:
# Add (B :: aC.B, h, y) to Q
to_scan.add(new_item)
elif new_item not in column:
# Add (B :: aC.B, h, y) to Ei and R
column.add(new_item)
items.append(new_item)


### The Earley predictor ### The Earley predictor
elif item.expect in self.NON_TERMINALS: ### (item.s == lr0) elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
new_items = [] new_items = []
for rule in self.predictions[item.expect]: for rule in self.predictions[item.expect]:
new_item = Item(rule, 0, column)
new_item = Item(rule, 0, i)
new_items.append(new_item) new_items.append(new_item)


# Process any held completions (H). # Process any held completions (H).
if item.expect in held_completions: if item.expect in held_completions:
new_item = item.advance() new_item = item.advance()
new_item.node = make_symbol_node(new_item.s, item.start, column)
label = (new_item.s, item.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect]) new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
new_items.append(new_item) new_items.append(new_item)


for new_item in new_items: for new_item in new_items:
if new_item.expect in self.TERMINALS: if new_item.expect in self.TERMINALS:
to_scan.add(new_item) to_scan.add(new_item)
elif new_item not in column.items:
elif new_item not in column:
column.add(new_item) column.add(new_item)
items.append(new_item) items.append(new_item)


def scan(i, column, to_scan):
def scan(i, to_scan):
"""The core Earley Scanner. """The core Earley Scanner.


This is a custom implementation of the scanner that uses the This is a custom implementation of the scanner that uses the
@@ -155,7 +242,7 @@ class Parser:
m = match(item.expect, stream, i) m = match(item.expect, stream, i)
if m: if m:
t = Token(item.expect.name, m.group(0), i, text_line, text_column) t = Token(item.expect.name, m.group(0), i, text_line, text_column)
delayed_matches[m.end()].append( (item, column, t) )
delayed_matches[m.end()].append( (item, i, t) )


if self.complete_lex: if self.complete_lex:
s = m.group(0) s = m.group(0)
@@ -163,7 +250,7 @@ class Parser:
m = match(item.expect, s[:-j]) m = match(item.expect, s[:-j])
if m: if m:
t = Token(item.expect.name, m.group(0), i, text_line, text_column) t = Token(item.expect.name, m.group(0), i, text_line, text_column)
delayed_matches[i+m.end()].append( (item, column, t) )
delayed_matches[i+m.end()].append( (item, i, t) )


# Remove any items that successfully matched in this pass from the to_scan buffer. # Remove any items that successfully matched in this pass from the to_scan buffer.
# This ensures we don't carry over tokens that already matched, if we're ignoring below. # This ensures we don't carry over tokens that already matched, if we're ignoring below.
@@ -177,13 +264,16 @@ class Parser:
m = match(x, stream, i) m = match(x, stream, i)
if m: if m:
# Carry over any items still in the scan buffer, to past the end of the ignored items. # Carry over any items still in the scan buffer, to past the end of the ignored items.
delayed_matches[m.end()].extend([(item, column, None) for item in to_scan ])
delayed_matches[m.end()].extend([(item, i, None) for item in to_scan ])


# If we're ignoring up to the end of the file, # carry over the start symbol if it already completed. # If we're ignoring up to the end of the file, # carry over the start symbol if it already completed.
delayed_matches[m.end()].extend([(item, column, None) for item in column.items if item.is_complete and item.s == start_symbol])
delayed_matches[m.end()].extend([(item, i, None) for item in columns[i] if item.is_complete and item.s == start_symbol])


next_set = Column(i + 1, self.FIRST) # Ei+1
next_to_scan = set() next_to_scan = set()
next_set = set()
columns.append(next_set)
next_transitives = dict()
transitives.append(next_transitives)


## 4) Process Tokens from delayed_matches. ## 4) Process Tokens from delayed_matches.
# This is the core of the Earley scanner. Create an SPPF node for each Token, # This is the core of the Earley scanner. Create an SPPF node for each Token,
@@ -193,7 +283,8 @@ class Parser:
for item, start, token in delayed_matches[i+1]: for item, start, token in delayed_matches[i+1]:
if token is not None: if token is not None:
new_item = item.advance() new_item = item.advance()
new_item.node = make_symbol_node(new_item.s, new_item.start, column)
label = (new_item.s, new_item.start, i)
new_item.node = node_cache[label] if label in node_cache else node_cache.setdefault(label, SymbolNode(*label))
new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token) new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)
else: else:
new_item = item new_item = item
@@ -210,11 +301,11 @@ class Parser:
if not next_set and not delayed_matches and not next_to_scan: if not next_set and not delayed_matches and not next_to_scan:
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect for item in to_scan}, set(to_scan)) raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect for item in to_scan}, set(to_scan))


return next_set, next_to_scan
return next_to_scan


# Main loop starts # Main loop starts
column0 = Column(0, self.FIRST)
column = column0
columns.append(set())
transitives.append(dict())


## The scan buffer. 'Q' in E.Scott's paper. ## The scan buffer. 'Q' in E.Scott's paper.
to_scan = set() to_scan = set()
@@ -223,38 +314,41 @@ class Parser:
# Add predicted items to the first Earley set (for the predictor) if they # Add predicted items to the first Earley set (for the predictor) if they
# result in a non-terminal, or the scanner if they result in a terminal. # result in a non-terminal, or the scanner if they result in a terminal.
for rule in self.predictions[start_symbol]: for rule in self.predictions[start_symbol]:
item = Item(rule, 0, column0)
item = Item(rule, 0, 0)
if item.expect in self.TERMINALS: if item.expect in self.TERMINALS:
to_scan.add(item) to_scan.add(item)
else: else:
column.add(item)
columns[0].add(item)


## The main Earley loop. ## The main Earley loop.
# Run the Prediction/Completion cycle for any Items in the current Earley set. # Run the Prediction/Completion cycle for any Items in the current Earley set.
# Completions will be added to the SPPF tree, and predictions will be recursively # Completions will be added to the SPPF tree, and predictions will be recursively
# processed down to terminals/empty nodes to be added to the scanner for the next # processed down to terminals/empty nodes to be added to the scanner for the next
# step. # step.
for i, token in enumerate(stream):
predict_and_complete(column, to_scan)
i = 0
for token in stream:
predict_and_complete(i, to_scan)


# Clear the node_cache and token_cache, which are only relevant for each # Clear the node_cache and token_cache, which are only relevant for each
# step in the Earley pass. # step in the Earley pass.
node_cache.clear() node_cache.clear()
token_cache.clear() token_cache.clear()
column, to_scan = scan(i, column, to_scan)
node_cache.clear()
to_scan = scan(i, to_scan)


if token == '\n': if token == '\n':
text_line += 1 text_line += 1
text_column = 1 text_column = 1
else: else:
text_column += 1 text_column += 1
i += 1


predict_and_complete(column, to_scan)
predict_and_complete(i, to_scan)


## Column is now the final column in the parse. If the parse was successful, the start ## Column is now the final column in the parse. If the parse was successful, the start
# symbol should have been completed in the last step of the Earley cycle, and will be in # symbol should have been completed in the last step of the Earley cycle, and will be in
# this column. Find the item for the start_symbol, which is the root of the SPPF tree. # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
solutions = [n.node for n in column.items if n.is_complete and n.node is not None and n.s == start_symbol and n.start is column0]
solutions = [n.node for n in columns[i] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]


if not solutions: if not solutions:
expected_tokens = [t.expect for t in to_scan] expected_tokens = [t.expect for t in to_scan]
@@ -265,9 +359,8 @@ class Parser:
## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller. ## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller.
# This means the caller can work directly with the SPPF tree. # This means the caller can work directly with the SPPF tree.
if not self.resolve_ambiguity: if not self.resolve_ambiguity:
return ForestToAmbiguousTreeVisitor(solutions[0], self.callbacks).go()
return ForestToAmbiguousTreeVisitor(self.callbacks).go(solutions[0])


# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities # ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
# according to the rules. # according to the rules.
return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go()

return self.forest_tree_visitor.go(solutions[0])

Loading…
Cancel
Save