Merge branch 'earley_sppf' (Oct 18)

7 years ago · 63e1ea0226
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -66,7 +66,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput):
        if allowed:
            message += '\nExpecting: %s\n' % allowed

        super(UnexpectedCharacters, self).__init__(message)
        super(UnexpectedCharacters, self).__init__(message.encode('utf-8'))



@@ -84,6 +84,6 @@ class UnexpectedToken(ParseError, UnexpectedInput):
                   "Expected one of: \n\t* %s\n"
                   % (token, self.line, self.column, '\n\t* '.join(self.expected)))

        super(UnexpectedToken, self).__init__(message)
        super(UnexpectedToken, self).__init__(message.encode('utf-8'))

 ###}
--- a/lark/lark.py
+++ b/lark/lark.py
@@ -62,14 +62,13 @@ class LarkOptions(object):
        self.profile = o.pop('profile', False)
        self.ambiguity = o.pop('ambiguity', 'auto')
        self.propagate_positions = o.pop('propagate_positions', False)
        self.earley__predict_all = o.pop('earley__predict_all', False)
        self.lexer_callbacks = o.pop('lexer_callbacks', {})

        assert self.parser in ('earley', 'lalr', 'cyk', None)

        if self.parser == 'earley' and self.transformer:
            raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.'
                             'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. lalr)')
        if self.ambiguity == 'explicit' and self.transformer:
            raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm for explicit ambiguity.'
                             'Please use your transformer on the resulting Forest, or use a different algorithm (i.e. LALR)')

        if o:
            raise ValueError("Unknown options: %s" % o.keys())
@@ -176,7 +175,7 @@ class Lark:
    def _build_parser(self):
        self.parser_class = get_frontend(self.options.parser, self.options.lexer)

        self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr')
        self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit')
        callback = self._parse_tree_builder.create_callback(self.options.transformer)
        if self.profiler:
            for f in dir(callback):
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -7,6 +7,7 @@ from .visitors import InlineTransformer # XXX Deprecated

 ###{standalone
 from functools import partial, wraps
 from itertools import repeat, product


 class ExpandSingleChild:
@@ -62,23 +63,11 @@ class PropagatePositions:


 class ChildFilter:
    "Optimized childfilter (assumes no duplication in parse tree, so it's safe to change it)"
    def __init__(self, to_include, node_builder):
        self.node_builder = node_builder
        self.to_include = to_include

    def __call__(self, children):
        filtered = []
        for i, to_expand in self.to_include:
            if to_expand:
                filtered += children[i].children
            else:
                filtered.append(children[i])

        return self.node_builder(filtered)

 class ChildFilterLALR(ChildFilter):
    "Optimized childfilter for LALR (assumes no duplication in parse tree, so it's safe to change it)"

    def __call__(self, children):
        filtered = []
        for i, to_expand in self.to_include:
@@ -89,19 +78,43 @@ class ChildFilterLALR(ChildFilter):
                    filtered = children[i].children
            else:
                filtered.append(children[i])

        return self.node_builder(filtered)

 def _should_expand(sym):
    return not sym.is_term and sym.name.startswith('_')

 def maybe_create_child_filter(expansion, keep_all_tokens, ambiguous):
 def maybe_create_child_filter(expansion, keep_all_tokens):
    to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion)
                  if keep_all_tokens or not (sym.is_term and sym.filter_out)]

    if len(to_include) < len(expansion) or any(to_expand for i, to_expand in to_include):
        return partial(ChildFilter if ambiguous else ChildFilterLALR, to_include)
        return partial(ChildFilter, to_include)

 class AmbiguousExpander:
    """Deal with the case where we're expanding children ('_rule') into a parent but the children
       are ambiguous. i.e. (parent->_ambig->_expand_this_rule). In this case, make the parent itself
       ambiguous with as many copies as their are ambiguous children, and then copy the ambiguous children
       into the right parents in the right places, essentially shifting the ambiguiuty up the tree."""
    def __init__(self, to_expand, tree_class, node_builder):
        self.node_builder = node_builder
        self.tree_class = tree_class
        self.to_expand = to_expand

    def __call__(self, children):
        def _is_ambig_tree(child):
            return hasattr(child, 'data') and child.data == '_ambig'

        ambiguous = [i for i in self.to_expand if _is_ambig_tree(children[i])]
        if ambiguous:
            expand = [iter(child.children) if i in ambiguous else repeat(child) for i, child in enumerate(children)]
            return self.tree_class('_ambig', [self.node_builder(list(f[0])) for f in product(zip(*expand))])
        return self.node_builder(children)

 def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
    to_expand = [i for i, sym in enumerate(expansion)
                 if keep_all_tokens or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))]
    if to_expand:
        return partial(AmbiguousExpander, to_expand, tree_class)

 class Callback(object):
    pass
@@ -113,8 +126,6 @@ def ptb_inline_args(func):
        return func(*children)
    return f



 class ParseTreeBuilder:
    def __init__(self, rules, tree_class, propagate_positions=False, keep_all_tokens=False, ambiguous=False):
        self.tree_class = tree_class
@@ -133,9 +144,10 @@ class ParseTreeBuilder:
            expand_single_child = options.expand1 if options else False

            wrapper_chain = filter(None, [
                (expand_single_child and not rule.alias) and ExpandSingleChild,
                maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous),
                self.propagate_positions and PropagatePositions,
                (expand_single_child and not rule.alias) and ExpandSingleChild,
                maybe_create_child_filter(rule.expansion, keep_all_tokens),
                self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
            ])

            yield rule, wrapper_chain
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -4,8 +4,7 @@ from functools import partial
 from .utils import get_regexp_width
 from .parsers.grammar_analysis import GrammarAnalyzer
 from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token

 from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk
 from .parsers import lalr_parser, earley, earley_forest, xearley, cyk
 from .tree import Tree

 class WithLexer:
@@ -56,13 +55,13 @@ class LALR_CustomLexer(WithLexer):
        self.lexer = lexer_cls(lexer_conf)


 def get_ambiguity_resolver(options):
 def get_ambiguity_options(options):
    if not options or options.ambiguity == 'resolve':
        return resolve_ambig.standard_resolve_ambig
        return {}
    elif options.ambiguity == 'resolve__antiscore_sum':
        return resolve_ambig.antiscore_sum_resolve_ambig
        return {'forest_sum_visitor': earley_forest.ForestAntiscoreSumVisitor}
    elif options.ambiguity == 'explicit':
        return None
        return {'resolve_ambiguity': False}
    raise ValueError(options)

 def tokenize_text(text):
@@ -78,8 +77,7 @@ class Earley(WithLexer):
    def __init__(self, lexer_conf, parser_conf, options=None):
        self.init_traditional_lexer(lexer_conf)

        self.parser = earley.Parser(parser_conf, self.match,
                                    resolve_ambiguity=get_ambiguity_resolver(options))
        self.parser = earley.Parser(parser_conf, self.match, **get_ambiguity_options(options))

    def match(self, term, token):
        return term.name == token.type
@@ -91,11 +89,10 @@ class XEarley:

        self._prepare_match(lexer_conf)

        kw.update(get_ambiguity_options(options))
        self.parser = xearley.Parser(parser_conf,
                                    self.match,
                                    resolve_ambiguity=get_ambiguity_resolver(options),
                                    ignore=lexer_conf.ignore,
                                    predict_all=options.earley__predict_all,
                                    **kw
                                    )

--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -1,160 +1,44 @@
 "This module implements an Earley Parser"

 # The parser uses a parse-forest to keep track of derivations and ambiguations.
 # When the parse ends successfully, a disambiguation stage resolves all ambiguity
 # (right now ambiguity resolution is not developed beyond the needs of lark)
 # Afterwards the parse tree is reduced (transformed) according to user callbacks.
 # I use the no-recursion version of Transformer, because the tree might be
 # deeper than Python's recursion limit (a bit absurd, but that's life)
 #
 # The algorithm keeps track of each state set, using a corresponding Column instance.
 # Column keeps track of new items using NewsList instances.
 #
 """This module implements an scanerless Earley parser.

 The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
    https://www.sciencedirect.com/science/article/pii/S1571066108001497

 That is probably the best reference for understanding the algorithm here.

 The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
 is better documented here:
    http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
 """
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com

 from ..tree import Tree
 from ..visitors import Transformer_InPlace, v_args
 from ..exceptions import ParseError, UnexpectedToken
 from .grammar_analysis import GrammarAnalyzer
 from ..grammar import NonTerminal
 from .earley_common import Column, Item
 from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode


 class Derivation(Tree):
    def __init__(self, rule, items=None):
        Tree.__init__(self, 'drv', items or [])
        self.meta.rule = rule
        self._hash = None

    def _pretty_label(self):    # Nicer pretty for debugging the parser
        return self.meta.rule.origin.name if self.meta.rule else self.data

    def __hash__(self):
        if self._hash is None:
            self._hash = Tree.__hash__(self)
        return self._hash

 class Item(object):
    "An Earley Item, the atom of the algorithm."

    def __init__(self, rule, ptr, start, tree):
        self.rule = rule
        self.ptr = ptr
        self.start = start
        self.tree = tree if tree is not None else Derivation(self.rule)

    @property
    def expect(self):
        return self.rule.expansion[self.ptr]

    @property
    def is_complete(self):
        return self.ptr == len(self.rule.expansion)

    def advance(self, tree):
        assert self.tree.data == 'drv'
        new_tree = Derivation(self.rule, self.tree.children + [tree])
        return self.__class__(self.rule, self.ptr+1, self.start, new_tree)

    def __eq__(self, other):
        return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule

    def __hash__(self):
        return hash((self.rule, self.ptr, id(self.start)))   # Always runs Derivation.__hash__

    def __repr__(self):
        before = list(map(str, self.rule.expansion[:self.ptr]))
        after = list(map(str, self.rule.expansion[self.ptr:]))
        return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after))

 class NewsList(list):
    "Keeps track of newly added items (append-only)"

    def __init__(self, initial=None):
        list.__init__(self, initial or [])
        self.last_iter = 0

    def get_news(self):
        i = self.last_iter
        self.last_iter = len(self)
        return self[i:]



 class Column:
    "An entry in the table, aka Earley Chart. Contains lists of items."
    def __init__(self, i, FIRST, predict_all=False):
        self.i = i
        self.to_reduce = NewsList()
        self.to_predict = NewsList()
        self.to_scan = []
        self.item_count = 0
        self.FIRST = FIRST

        self.predicted = set()
        self.completed = {}
        self.predict_all = predict_all

    def add(self, items):
        """Sort items into scan/predict/reduce newslists

        Makes sure only unique items are added.
        """
        for item in items:

            item_key = item, item.tree  # Elsewhere, tree is not part of the comparison
            if item.is_complete:
                # XXX Potential bug: What happens if there's ambiguity in an empty rule?
                if item.rule.expansion and item_key in self.completed:
                    old_tree = self.completed[item_key].tree
                    if old_tree == item.tree:
                        is_empty = not self.FIRST[item.rule.origin]
                        if not is_empty:
                            continue

                    if old_tree.data != '_ambig':
                        new_tree = old_tree.copy()
                        new_tree.meta.rule = old_tree.meta.rule
                        old_tree.set('_ambig', [new_tree])
                        old_tree.meta.rule = None    # No longer a 'drv' node

                    if item.tree.children[0] is old_tree:   # XXX a little hacky!
                        raise ParseError("Infinite recursion in grammar! (Rule %s)" % item.rule)

                    if item.tree not in old_tree.children:
                        old_tree.children.append(item.tree)
                    # old_tree.children.append(item.tree)
                else:
                    self.completed[item_key] = item
                self.to_reduce.append(item)
            else:
                if item.expect.is_term:
                    self.to_scan.append(item)
                else:
                    k = item_key if self.predict_all else item
                    if k in self.predicted:
                        continue
                    self.predicted.add(k)
                    self.to_predict.append(item)

            self.item_count += 1    # Only count if actually added


    def __bool__(self):
        return bool(self.item_count)
    __nonzero__ = __bool__  # Py2 backwards-compatibility
 from collections import deque, defaultdict

 class Parser:
    def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None):
    def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor):
        analysis = GrammarAnalyzer(parser_conf)
        self.parser_conf = parser_conf
        self.resolve_ambiguity = resolve_ambiguity
        self.forest_sum_visitor = forest_sum_visitor

        self.FIRST = analysis.FIRST
        self.postprocess = {}
        self.callbacks = {}
        self.predictions = {}

        ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
        #  the slow 'isupper' in is_terminal.
        self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
        self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }

        for rule in parser_conf.rules:
            self.postprocess[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias)
            self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias)
            self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]

        self.term_matcher = term_matcher
@@ -163,72 +47,163 @@ class Parser:
    def parse(self, stream, start_symbol=None):
        # Define parser functions
        start_symbol = NonTerminal(start_symbol or self.parser_conf.start)

        _Item = Item
        match = self.term_matcher

        def predict(nonterm, column):
            assert not nonterm.is_term, nonterm
            return [_Item(rule, 0, column, None) for rule in self.predictions[nonterm]]

        def complete(item):
            name = item.rule.origin
            return [i.advance(item.tree) for i in item.start.to_predict if i.expect == name]

        def predict_and_complete(column):
            while True:
                to_predict = {x.expect for x in column.to_predict.get_news()
                              if x.ptr}  # if not part of an already predicted batch
                to_reduce = set(column.to_reduce.get_news())
                if not (to_predict or to_reduce):
                    break

                for nonterm in to_predict:
                    column.add( predict(nonterm, column) )

                for item in to_reduce:
                    new_items = list(complete(item))
                    if item in new_items:
                        raise ParseError('Infinite recursion detected! (rule %s)' % item.rule)
                    column.add(new_items)

        def scan(i, token, column):
            next_set = Column(i, self.FIRST)
            next_set.add(item.advance(token) for item in column.to_scan if match(item.expect, token))

            if not next_set:
                expect = {i.expect.name for i in column.to_scan}
                raise UnexpectedToken(token, expect, considered_rules=set(column.to_scan))

            return next_set
        held_completions = defaultdict(list)
        node_cache = {}
        token_cache = {}

        def make_symbol_node(s, start, end):
            label = (s, start.i, end.i)
            if label in node_cache:
                node = node_cache[label]
            else:
                node = node_cache[label] = SymbolNode(s, start, end)
            return node

        def predict_and_complete(column, to_scan):
            """The core Earley Predictor and Completer.

            At each stage of the input, we handling any completed items (things
            that matched on the last cycle) and use those to predict what should
            come next in the input stream. The completions and any predicted
            non-terminals are recursively processed until we reach a set of,
            which can be added to the scan list for the next scanner cycle."""
            held_completions.clear()

            # R (items) = Ei (column.items)
            items = deque(column.items)
            while items:
                item = items.pop()    # remove an element, A say, from R

                ### The Earley completer
                if item.is_complete:   ### (item.s == string)
                    if item.node is None:
                        item.node = make_symbol_node(item.s, item.start, column)
                        item.node.add_family(item.s, item.rule, item.start, None, None)

                    # Empty has 0 length. If we complete an empty symbol in a particular
                    # parse step, we need to be able to use that same empty symbol to complete
                    # any predictions that result, that themselves require empty. Avoids
                    # infinite recursion on empty symbols.
                    # held_completions is 'H' in E.Scott's paper.
                    is_empty_item = item.start.i == column.i
                    if is_empty_item:
                        held_completions[item.rule.origin] = item.node

                    originators = [originator for originator in item.start.items if originator.expect is not None and originator.expect == item.s]
                    for originator in originators:
                        new_item = originator.advance()
                        new_item.node = make_symbol_node(new_item.s, originator.start, column)
                        new_item.node.add_family(new_item.s, new_item.rule, new_item.start, originator.node, item.node)
                        if new_item.expect in self.TERMINALS:
                            # Add (B :: aC.B, h, y) to Q
                            to_scan.add(new_item)
                        elif new_item not in column.items:
                            # Add (B :: aC.B, h, y) to Ei and R
                            column.add(new_item)
                            items.append(new_item)

                ### The Earley predictor
                elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
                    new_items = []
                    for rule in self.predictions[item.expect]:
                        new_item = Item(rule, 0, column)
                        new_items.append(new_item)

                    # Process any held completions (H).
                    if item.expect in held_completions:
                        new_item = item.advance()
                        new_item.node = make_symbol_node(new_item.s, item.start, column)
                        new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
                        new_items.append(new_item)

                    for new_item in new_items:
                        if new_item.expect in self.TERMINALS:
                            to_scan.add(new_item)
                        elif new_item not in column.items:
                            column.add(new_item)
                            items.append(new_item)

        def scan(i, token, column, to_scan):
            """The core Earley Scanner.

            This is a custom implementation of the scanner that uses the
            Lark lexer to match tokens. The scan list is built by the
            Earley predictor, based on the previously completed tokens.
            This ensures that at each phase of the parse we have a custom
            lexer context, allowing for more complex ambiguities."""
            next_set = Column(i+1, self.FIRST)
            next_to_scan = set()
            for item in set(to_scan):
                if match(item.expect, token):
                    new_item = item.advance()
                    new_item.node = make_symbol_node(new_item.s, new_item.start, column)
                    new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)

                    if new_item.expect in self.TERMINALS:
                        # add (B ::= Aai+1.B, h, y) to Q'
                        next_to_scan.add(new_item)
                    else:
                        # add (B ::= Aa+1.B, h, y) to Ei+1
                        next_set.add(new_item)

            if not next_set and not next_to_scan:
                expect = {i.expect.name for i in to_scan}
                raise UnexpectedToken(token, expect, considered_rules = set(to_scan))

            return next_set, next_to_scan

        # Main loop starts
        column0 = Column(0, self.FIRST)
        column0.add(predict(start_symbol, column0))

        column = column0

        ## The scan buffer. 'Q' in E.Scott's paper.
        to_scan = set()

        ## Predict for the start_symbol.
        # Add predicted items to the first Earley set (for the predictor) if they
        # result in a non-terminal, or the scanner if they result in a terminal.
        for rule in self.predictions[start_symbol]:
            item = Item(rule, 0, column0)
            if item.expect in self.TERMINALS:
                to_scan.add(item)
            else:
                column.add(item)

        ## The main Earley loop.
        # Run the Prediction/Completion cycle for any Items in the current Earley set.
        # Completions will be added to the SPPF tree, and predictions will be recursively
        # processed down to terminals/empty nodes to be added to the scanner for the next
        # step.
        for i, token in enumerate(stream):
            predict_and_complete(column)
            column = scan(i, token, column)
            predict_and_complete(column, to_scan)

            # Clear the node_cache and token_cache, which are only relevant for each
            # step in the Earley pass.
            node_cache.clear()
            token_cache.clear()
            column, to_scan = scan(i, token, column, to_scan)

        predict_and_complete(column)
        predict_and_complete(column, to_scan)

        # Parse ended. Now build a parse tree
        solutions = [n.tree for n in column.to_reduce
                     if n.rule.origin==start_symbol and n.start is column0]
        ## Column is now the final column in the parse. If the parse was successful, the start
        # symbol should have been completed in the last step of the Earley cycle, and will be in
        # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
        solutions = [n.node for n in column.items if n.is_complete and n.node is not None and n.s == start_symbol and n.start is column0]

        if not solutions:
            raise ParseError('Incomplete parse: Could not find a solution to input')
        elif len(solutions) == 1:
            tree = solutions[0]
        else:
            tree = Tree('_ambig', solutions)

        if self.resolve_ambiguity:
            tree = self.resolve_ambiguity(tree)
        elif len(solutions) > 1:
            raise ParseError('Earley should not generate multiple start symbol items!')

        return ApplyCallbacks(self.postprocess).transform(tree)
        ## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller.
        # This means the caller can work directly with the SPPF tree.
        if not self.resolve_ambiguity:
            return solutions[0]

        # ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
        # according to the rules.
        return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go()

 class ApplyCallbacks(Transformer_InPlace):
    def __init__(self, postprocess):
--- a/lark/parsers/earley_common.py
+++ b/lark/parsers/earley_common.py
@@ -0,0 +1,80 @@
 "This module implements an Earley Parser"

 # The parser uses a parse-forest to keep track of derivations and ambiguations.
 # When the parse ends successfully, a disambiguation stage resolves all ambiguity
 # (right now ambiguity resolution is not developed beyond the needs of lark)
 # Afterwards the parse tree is reduced (transformed) according to user callbacks.
 # I use the no-recursion version of Transformer, because the tree might be
 # deeper than Python's recursion limit (a bit absurd, but that's life)
 #
 # The algorithm keeps track of each state set, using a corresponding Column instance.
 # Column keeps track of new items using NewsList instances.
 #
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com

 ## for recursive repr
 from ..tree import Tree

 class Derivation(Tree):
    def __init__(self, rule, children = None):
        Tree.__init__(self, 'drv', children if children is not None else [])
        self.meta.rule = rule
        self._hash = None

    def __repr__(self, indent = 0):
        return 'Derivation(%s, %s, %s)' % (self.data, self.rule.origin, '...')

    def __hash__(self):
        if self._hash is None:
            self._hash = Tree.__hash__(self)
        return self._hash

 class Item(object):
    "An Earley Item, the atom of the algorithm."

    __slots__ = ('s', 'rule', 'ptr', 'start', 'is_complete', 'expect', 'node', '_hash')
    def __init__(self, rule, ptr, start):
        self.is_complete = len(rule.expansion) == ptr
        self.rule = rule    # rule
        self.ptr = ptr      # ptr
        self.start = start  # j
        self.node = None    # w
        if self.is_complete:
            self.s = rule.origin
            self.expect = None
        else:
            self.s = (rule, ptr)
            self.expect = rule.expansion[ptr]
        self._hash = hash((self.s, self.start.i))

    def advance(self):
        return self.__class__(self.rule, self.ptr + 1, self.start)

    def __eq__(self, other):
        return self is other or (self.s == other.s and self.start.i == other.start.i)

    def __hash__(self):
        return self._hash

    def __repr__(self):
        return '%s (%d)' % (self.s if self.is_complete else self.rule.origin, self.start.i)

 class Column:
    "An entry in the table, aka Earley Chart. Contains lists of items."
    def __init__(self, i, FIRST):
        self.i = i
        self.items = set()
        self.FIRST = FIRST

    def add(self, item):
        """Sort items into scan/predict/reduce newslists

        Makes sure only unique items are added.
        """
        self.items.add(item)

    def __bool__(self):
        return bool(self.items)

    __nonzero__ = __bool__  # Py2 backwards-compatibility
--- a/lark/parsers/earley_forest.py
+++ b/lark/parsers/earley_forest.py
@@ -0,0 +1,347 @@
 """"This module implements an SPPF implementation

 This is used as the primary output mechanism for the Earley parser
 in order to store complex ambiguities.

 Full reference and more details is here:
 http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/
 """

 from ..tree import Tree
 from ..exceptions import ParseError
 from ..lexer import Token
 from ..utils import Str
 from ..grammar import NonTerminal, Terminal
 from .earley_common import Column, Derivation

 from collections import deque

 class SymbolNode(object):
    """
    A Symbol Node represents a symbol (or Intermediate LR0).

    Symbol nodes are keyed by the symbol (s). For intermediate nodes
    s will be an LR0, stored as a tuple of (rule, ptr). For completed symbol
    nodes, s will be a string representing the non-terminal origin (i.e.
    the left hand side of the rule).

    The children of a Symbol or Intermediate Node will always be Packed Nodes;
    with each Packed Node child representing a single derivation of a production.

    Hence a Symbol Node with a single child is unambiguous.
    """
    __slots__ = ('s', 'start', 'end', 'children', 'priority', 'is_intermediate')
    def __init__(self, s, start, end):
        self.s = s
        self.start = start
        self.end = end
        self.children = set()
        self.priority = None
        self.is_intermediate = isinstance(s, tuple)

    def add_family(self, lr0, rule, start, left, right):
        self.children.add(PackedNode(self, lr0, rule, start, left, right))

    @property
    def is_ambiguous(self):
        return len(self.children) > 1

    def __iter__(self):
        return iter(self.children)

    def __eq__(self, other):
        if not isinstance(other, SymbolNode):
            return False
        return self is other or (self.s == other.s and self.start == other.start and self.end is other.end)

    def __hash__(self):
        return hash((self.s, self.start.i, self.end.i))

    def __repr__(self):
        symbol = self.s.name if isinstance(self.s, (NonTerminal, Terminal)) else self.s[0].origin.name
        return "(%s, %d, %d, %d)" % (symbol, self.start.i, self.end.i, self.priority if self.priority is not None else 0)

 class PackedNode(object):
    """
    A Packed Node represents a single derivation in a symbol node.
    """
    __slots__ = ('parent', 's', 'rule', 'start', 'left', 'right', 'priority', '_hash')
    def __init__(self, parent, s, rule, start, left, right):
        self.parent = parent
        self.s = s
        self.start = start
        self.rule = rule
        self.left = left
        self.right = right
        self.priority = None
        self._hash = hash((self.s, self.start.i, self.left, self.right))

    @property
    def is_empty(self):
        return self.left is None and self.right is None

    def __iter__(self):
        return iter([self.left, self.right])

    def __lt__(self, other):
        if self.is_empty and not other.is_empty: return True
        if self.priority < other.priority:       return True
        return False

    def __gt__(self, other):
        if self.is_empty and not other.is_empty: return True
        if self.priority > other.priority:       return True
        return False

    def __eq__(self, other):
        if not isinstance(other, PackedNode):
            return False
        return self is other or (self.s == other.s and self.start == other.start and self.left == other.left and self.right == other.right)

    def __hash__(self):
        return self._hash

    def __repr__(self):
        symbol = self.s.name if isinstance(self.s, (NonTerminal, Terminal)) else self.s[0].origin.name
        return "{%s, %d, %s, %s, %s}" % (symbol, self.start.i, self.left, self.right, self.priority if self.priority is not None else 0)

 class ForestVisitor(object):
    """
    An abstract base class for building forest visitors.

    Use this as a base when you need to walk the forest.
    """
    def __init__(self, root):
        self.root = root
        self.result = None

    def visit_token_node(self, node): pass
    def visit_symbol_node_in(self, node): pass
    def visit_symbol_node_out(self, node): pass
    def visit_packed_node_in(self, node): pass
    def visit_packed_node_out(self, node): pass

    def go(self):
        # Visiting is a list of IDs of all symbol/intermediate nodes currently in
        # the stack. It serves two purposes: to detect when we 'recurse' in and out
        # of a symbol/intermediate so that we can process both up and down. Also,
        # since the SPPF can have cycles it allows us to detect if we're trying
        # to recurse into a node that's already on the stack (infinite recursion).
        visiting = set()

        # We do not use recursion here to walk the Forest due to the limited
        # stack size in python. Therefore input_stack is essentially our stack.
        input_stack = deque([self.root])

        # It is much faster to cache these as locals since they are called
        # many times in large parses.
        vpno = getattr(self, 'visit_packed_node_out')
        vpni = getattr(self, 'visit_packed_node_in')
        vsno = getattr(self, 'visit_symbol_node_out')
        vsni = getattr(self, 'visit_symbol_node_in')
        vtn = getattr(self, 'visit_token_node')
        while input_stack:
            current = next(reversed(input_stack))
            try:
                next_node = next(current)
            except StopIteration:
                input_stack.pop()
                continue
            except TypeError:
                ### If the current object is not an iterator, pass through to Token/SymbolNode
                pass
            else:
                if next_node is None:
                    continue

                if id(next_node) in visiting:
                    raise ParseError("Infinite recursion in grammar!")

                input_stack.append(next_node)
                continue

            if isinstance(current, Str):
                vtn(current)
                input_stack.pop()
                continue

            current_id = id(current)
            if current_id in visiting:
                if isinstance(current, PackedNode): vpno(current)
                else:                               vsno(current)
                input_stack.pop()
                visiting.remove(current_id)
                continue
            else:
                visiting.add(current_id)
                if isinstance(current, PackedNode): next_node = vpni(current)
                else:                               next_node = vsni(current)
                if next_node is None:
                    continue

                if id(next_node) in visiting:
                    raise ParseError("Infinite recursion in grammar!")

                input_stack.append(next_node)
                continue

        return self.result

 class ForestSumVisitor(ForestVisitor):
    """
    A visitor for prioritizing ambiguous parts of the Forest.

    This visitor is the default when resolving ambiguity. It pushes the priorities
    from the rules into the SPPF nodes; and then sorts the packed node children
    of ambiguous symbol or intermediate node according to the priorities.
    This relies on the custom sort function provided in PackedNode.__lt__; which
    uses these properties (and other factors) to sort the ambiguous packed nodes.
    """
    def visit_packed_node_in(self, node):
        return iter([node.left, node.right])

    def visit_symbol_node_in(self, node):
        return iter(node.children)

    def visit_packed_node_out(self, node):
        node.priority = 0
        if node.rule.options and node.rule.options.priority:           node.priority += node.rule.options.priority
        if node.right is not None and hasattr(node.right, 'priority'): node.priority += node.right.priority
        if node.left is not None and hasattr(node.left, 'priority'):   node.priority += node.left.priority

    def visit_symbol_node_out(self, node):
        node.priority = max(child.priority for child in node.children)
        node.children = sorted(node.children, reverse = True)

 class ForestAntiscoreSumVisitor(ForestSumVisitor):
    """
    A visitor for prioritizing ambiguous parts of the Forest.

    This visitor is used when resolve_ambiguity == 'resolve__antiscore_sum'.
    It pushes the priorities from the rules into the SPPF nodes, and implements
    a 'least cost' mechanism for resolving ambiguity (reverse of the default
    priority mechanism). It uses a custom __lt__ comparator key for sorting
    the packed node children.
    """
    def visit_symbol_node_out(self, node):
        node.priority = min(child.priority for child in node.children)
        node.children = sorted(node.children, key=AntiscoreSumComparator, reverse = True)

 class AntiscoreSumComparator(object):
    """
    An antiscore-sum comparator for PackedNode objects.

    This allows 'sorting' an iterable of PackedNode objects so that they
    are arranged lowest priority first.
    """
    __slots__ = ['obj']
    def __init__(self, obj, *args):
        self.obj = obj

    def __lt__(self, other):
        if self.obj.is_empty and not other.obj.is_empty: return True
        if self.obj.priority > other.obj.priority:       return True
        return False

    def __gt__(self, other):
        if self.obj.is_empty and not other.obj.is_empty: return True
        if self.obj.priority < other.obj.priority:       return True
        return False

 class ForestToTreeVisitor(ForestVisitor):
    """
    A Forest visitor which converts an SPPF forest to an unambiguous AST.

    The implementation in this visitor walks only the first ambiguous child
    of each symbol node. When it finds an ambiguous symbol node it first
    calls the forest_sum_visitor implementation to sort the children
    into preference order using the algorithms defined there; so the first
    child should always be the highest preference. The forest_sum_visitor
    implementation should be another ForestVisitor which sorts the children
    according to some priority mechanism.
    """
    def __init__(self, root, forest_sum_visitor = ForestSumVisitor, callbacks = None):
        super(ForestToTreeVisitor, self).__init__(root)
        self.forest_sum_visitor = forest_sum_visitor
        self.output_stack = deque()
        self.callbacks = callbacks
        self.result = None

    def visit_token_node(self, node):
        self.output_stack[-1].append(node)

    def visit_symbol_node_in(self, node):
        if node.is_ambiguous and node.priority is None:
            self.forest_sum_visitor(node).go()
        return next(iter(node.children))

    def visit_packed_node_in(self, node):
        if not node.parent.is_intermediate:
            self.output_stack.append([])
        return iter([node.left, node.right])

    def visit_packed_node_out(self, node):
        if not node.parent.is_intermediate:
            result = self.callbacks[node.rule](self.output_stack.pop())
            if self.output_stack:
                self.output_stack[-1].append(result)
            else:
                self.result = result

 class ForestToAmbiguousTreeVisitor(ForestVisitor):
    """
    A Forest visitor which converts an SPPF forest to an ambiguous AST.

    Because of the fundamental disparity between what can be stored in
    an SPPF and what can be stored in a Tree; this implementation is not
    complete. It correctly deals with ambiguities that occur on symbol nodes only,
    and cannot deal with ambiguities that occur on intermediate nodes.

    Usually, most parsers can be rewritten to avoid intermediate node
    ambiguities. Also, this implementation could be fixed, however
    the code to handle intermediate node ambiguities is messy and
    would not be performant. It is much better not to use this and
    instead to correctly disambiguate the forest and only store unambiguous
    parses in Trees. It is here just to provide some parity with the
    old ambiguity='explicit'.

    This is mainly used by the test framework, to make it simpler to write
    tests ensuring the SPPF contains the right results.
    """
    def __init__(self, root, callbacks):
        super(ForestToAmbiguousTreeVisitor, self).__init__(root)
        self.output_stack = deque()
        self.callbacks = callbacks
        self.result = None

    def visit_token_node(self, node):
        self.output_stack[-1].children.append(node)

    def visit_symbol_node_in(self, node):
        if not node.is_intermediate and node.is_ambiguous:
            self.output_stack.append(Tree('_ambig', []))
        return iter(node.children)

    def visit_symbol_node_out(self, node):
        if node.is_ambiguous:
            result = self.output_stack.pop()
            if self.output_stack:
                self.output_stack[-1].children.append(result)
            else:
                self.result = result

    def visit_packed_node_in(self, node):
        #### NOTE:
        ## When an intermediate node (node.parent.s == tuple) has ambiguous children this
        ## forest visitor will break.
        if not node.parent.is_intermediate:
            self.output_stack.append(Tree('drv', []))
        return iter([node.left, node.right])

    def visit_packed_node_out(self, node):
        if not node.parent.is_intermediate:
            result = self.callbacks[node.rule](self.output_stack.pop().children)
            if self.output_stack:
                self.output_stack[-1].children.append(result)
            else:
                self.result = result
--- a/lark/parsers/resolve_ambig.py
+++ b/lark/parsers/resolve_ambig.py
@@ -1,109 +0,0 @@
 from ..utils import compare
 from functools import cmp_to_key

 from ..tree import Tree


 # Standard ambiguity resolver (uses comparison)
 #
 # Author: Erez Sh

 def _compare_rules(rule1, rule2):
    return -compare( len(rule1.expansion), len(rule2.expansion))

 def _sum_priority(tree):
    p = 0

    for n in tree.iter_subtrees():
        try:
            p += n.meta.rule.options.priority or 0
        except AttributeError:
            pass

    return p

 def _compare_priority(tree1, tree2):
    tree1.iter_subtrees()

 def _compare_drv(tree1, tree2):
    try:
        rule1 = tree1.meta.rule
    except AttributeError:
        rule1 = None

    try:
        rule2 = tree2.meta.rule
    except AttributeError:
        rule2 = None

    if None == rule1 == rule2:
        return compare(tree1, tree2)
    elif rule1 is None:
        return -1
    elif rule2 is None:
        return 1

    assert tree1.data != '_ambig'
    assert tree2.data != '_ambig'

    p1 = _sum_priority(tree1)
    p2 = _sum_priority(tree2)
    c = (p1 or p2) and compare(p1, p2)
    if c:
        return c

    c = _compare_rules(tree1.meta.rule, tree2.meta.rule)
    if c:
        return c

    # rules are "equal", so compare trees
    if len(tree1.children) == len(tree2.children):
        for t1, t2 in zip(tree1.children, tree2.children):
            c = _compare_drv(t1, t2)
            if c:
                return c

    return compare(len(tree1.children), len(tree2.children))


 def _standard_resolve_ambig(tree):
    assert tree.data == '_ambig'
    key_f = cmp_to_key(_compare_drv)
    best = max(tree.children, key=key_f)
    assert best.data == 'drv'
    tree.set('drv', best.children)
    tree.meta.rule = best.meta.rule   # needed for applying callbacks

 def standard_resolve_ambig(tree):
    for ambig in tree.find_data('_ambig'):
        _standard_resolve_ambig(ambig)

    return tree




 # Anti-score Sum
 #
 # Author: Uriva (https://github.com/uriva)

 def _antiscore_sum_drv(tree):
    if not isinstance(tree, Tree):
        return 0

    assert tree.data != '_ambig'

    return _sum_priority(tree)

 def _antiscore_sum_resolve_ambig(tree):
    assert tree.data == '_ambig'
    best = min(tree.children, key=_antiscore_sum_drv)
    assert best.data == 'drv'
    tree.set('drv', best.children)
    tree.meta.rule = best.meta.rule   # needed for applying callbacks

 def antiscore_sum_resolve_ambig(tree):
    for ambig in tree.find_data('_ambig'):
        _antiscore_sum_resolve_ambig(ambig)

    return tree
--- a/lark/parsers/xearley.py
+++ b/lark/parsers/xearley.py
@@ -1,107 +1,163 @@
 "This module implements an experimental Earley Parser with a dynamic lexer"

 # The parser uses a parse-forest to keep track of derivations and ambiguations.
 # When the parse ends successfully, a disambiguation stage resolves all ambiguity
 # (right now ambiguity resolution is not developed beyond the needs of lark)
 # Afterwards the parse tree is reduced (transformed) according to user callbacks.
 # I use the no-recursion version of Transformer and Visitor, because the tree might be
 # deeper than Python's recursion limit (a bit absurd, but that's life)
 #
 # The algorithm keeps track of each state set, using a corresponding Column instance.
 # Column keeps track of new items using NewsList instances.
 #
 # Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
 # uses regular expressions by necessity, achieving high-performance while maintaining all of
 # Earley's power in parsing any CFG.
 #
 #
 """This module implements an experimental Earley parser with a dynamic lexer

 The core Earley algorithm used here is based on Elizabeth Scott's implementation, here:
    https://www.sciencedirect.com/science/article/pii/S1571066108001497

 That is probably the best reference for understanding the algorithm here.

 The Earley parser outputs an SPPF-tree as per that document. The SPPF tree format
 is better documented here:
    http://www.bramvandersanden.com/post/2014/06/shared-packed-parse-forest/

 Instead of running a lexer beforehand, or using a costy char-by-char method, this parser
 uses regular expressions by necessity, achieving high-performance while maintaining all of
 Earley's power in parsing any CFG.
 """
 # Author: Erez Shinan (2017)
 # Email : erezshin@gmail.com

 from collections import defaultdict
 from collections import defaultdict, deque

 from ..exceptions import ParseError, UnexpectedCharacters
 from ..lexer import Token
 from ..tree import Tree
 from .grammar_analysis import GrammarAnalyzer
 from ..grammar import NonTerminal, Terminal

 from .earley import ApplyCallbacks, Item, Column
 from .earley import ApplyCallbacks
 from .earley_common import Column, Item
 from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode


 class Parser:
    def __init__(self,  parser_conf, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False, complete_lex=False):
        self.analysis = GrammarAnalyzer(parser_conf)
    def __init__(self,  parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor, ignore = (), complete_lex = False):
        analysis = GrammarAnalyzer(parser_conf)
        self.parser_conf = parser_conf
        self.resolve_ambiguity = resolve_ambiguity
        self.forest_sum_visitor = forest_sum_visitor
        self.ignore = [Terminal(t) for t in ignore]
        self.predict_all = predict_all
        self.complete_lex = complete_lex

        self.FIRST = self.analysis.FIRST
        self.postprocess = {}
        self.FIRST = analysis.FIRST
        self.callbacks = {}
        self.predictions = {}

        ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
        #  the slow 'isupper' in is_terminal.
        self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
        self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }
        for rule in parser_conf.rules:
            self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
            self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]
            self.callbacks[rule] = getattr(parser_conf.callback, rule.alias or rule.origin, None)
            self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]

        self.term_matcher = term_matcher


    def parse(self, stream, start_symbol=None):
        # Define parser functions
        start_symbol = NonTerminal(start_symbol or self.parser_conf.start)
        delayed_matches = defaultdict(list)
        match = self.term_matcher

        text_line = 1
        text_column = 1
        # Held Completions (H in E.Scotts paper).
        held_completions = {}

        def predict(nonterm, column):
            assert not nonterm.is_term, nonterm
            return [Item(rule, 0, column, None) for rule in self.predictions[nonterm]]

        def complete(item):
            name = item.rule.origin
            return [i.advance(item.tree) for i in item.start.to_predict if i.expect == name]

        def predict_and_complete(column):
            while True:
                to_predict = {x.expect for x in column.to_predict.get_news()
                              if x.ptr}  # if not part of an already predicted batch
                to_reduce = column.to_reduce.get_news()
                if not (to_predict or to_reduce):
                    break

                for nonterm in to_predict:
                    column.add( predict(nonterm, column) )
                for item in to_reduce:
                    new_items = list(complete(item))
                    if item in new_items:
                        raise ParseError('Infinite recursion detected! (rule %s)' % item.rule)
                    column.add(new_items)

        def scan(i, column):
            to_scan = column.to_scan
        # Cache for nodes & tokens created in a particular parse step.
        node_cache = {}
        token_cache = {}

            for x in self.ignore:
                m = match(x, stream, i)
                if m:
                    delayed_matches[m.end()] += set(to_scan)
                    delayed_matches[m.end()] += set(column.to_reduce)

                    # TODO add partial matches for ignore too?
                    # s = m.group(0)
                    # for j in range(1, len(s)):
                    #     m = x.match(s[:-j])
                    #     if m:
                    #         delayed_matches[m.end()] += to_scan
        text_line = 1
        text_column = 1

            for item in to_scan:
        def make_symbol_node(s, start, end):
            label = (s, start.i, end.i)
            if label in node_cache:
                node = node_cache[label]
            else:
                node = node_cache[label] = SymbolNode(s, start, end)
            return node

        def predict_and_complete(column, to_scan):
            """The core Earley Predictor and Completer.

            At each stage of the input, we handling any completed items (things
            that matched on the last cycle) and use those to predict what should
            come next in the input stream. The completions and any predicted
            non-terminals are recursively processed until we reach a set of,
            which can be added to the scan list for the next scanner cycle."""
            held_completions.clear()

            # R (items) = Ei (column.items)
            items = deque(column.items)
            while items:
                item = items.pop()    # remove an element, A say, from R

                ### The Earley completer
                if item.is_complete:   ### (item.s == string)
                    if item.node is None:
                        item.node = make_symbol_node(item.s, item.start, column)
                        item.node.add_family(item.s, item.rule, item.start, None, None)

                    # Empty has 0 length. If we complete an empty symbol in a particular
                    # parse step, we need to be able to use that same empty symbol to complete
                    # any predictions that result, that themselves require empty. Avoids
                    # infinite recursion on empty symbols.
                    # held_completions is 'H' in E.Scott's paper.
                    is_empty_item = item.start.i == column.i
                    if is_empty_item:
                        held_completions[item.rule.origin] = item.node

                    originators = [originator for originator in item.start.items if originator.expect is not None and originator.expect == item.s]
                    for originator in originators:
                        new_item = originator.advance()
                        new_item.node = make_symbol_node(new_item.s, originator.start, column)
                        new_item.node.add_family(new_item.s, new_item.rule, new_item.start, originator.node, item.node)
                        if new_item.expect in self.TERMINALS:
                            # Add (B :: aC.B, h, y) to Q
                            to_scan.add(new_item)
                        elif new_item not in column.items:
                            # Add (B :: aC.B, h, y) to Ei and R
                            column.add(new_item)
                            items.append(new_item)

                ### The Earley predictor
                elif item.expect in self.NON_TERMINALS: ### (item.s == lr0)
                    new_items = []
                    for rule in self.predictions[item.expect]:
                        new_item = Item(rule, 0, column)
                        new_items.append(new_item)

                    # Process any held completions (H).
                    if item.expect in held_completions:
                        new_item = item.advance()
                        new_item.node = make_symbol_node(new_item.s, item.start, column)
                        new_item.node.add_family(new_item.s, new_item.rule, new_item.start, item.node, held_completions[item.expect])
                        new_items.append(new_item)

                    for new_item in new_items:
                        if new_item.expect in self.TERMINALS:
                            to_scan.add(new_item)
                        elif new_item not in column.items:
                            column.add(new_item)
                            items.append(new_item)

        def scan(i, column, to_scan):
            """The core Earley Scanner.

            This is a custom implementation of the scanner that uses the
            Lark lexer to match tokens. The scan list is built by the
            Earley predictor, based on the previously completed tokens.
            This ensures that at each phase of the parse we have a custom
            lexer context, allowing for more complex ambiguities."""

            # 1) Loop the expectations and ask the lexer to match.
            # Since regexp is forward looking on the input stream, and we only
            # want to process tokens when we hit the point in the stream at which
            # they complete, we push all tokens into a buffer (delayed_matches), to
            # be held possibly for a later parse step when we reach the point in the
            # input stream at which they complete.
            for item in set(to_scan):
                m = match(item.expect, stream, i)
                if m:
                    t = Token(item.expect.name, m.group(0), i, text_line, text_column)
                    delayed_matches[m.end()].append(item.advance(t))
                    delayed_matches[m.end()].append( (item, column, t) )

                    if self.complete_lex:
                        s = m.group(0)
@@ -109,25 +165,85 @@ class Parser:
                            m = match(item.expect, s[:-j])
                            if m:
                                t = Token(item.expect.name, m.group(0), i, text_line, text_column)
                                delayed_matches[i+m.end()].append(item.advance(t))
                                delayed_matches[i+m.end()].append( (item, column, t) )

                    # Remove any items that successfully matched in this pass from the to_scan buffer.
                    # This ensures we don't carry over tokens that already matched, if we're ignoring below.
                    to_scan.remove(item)

            # 3) Process any ignores. This is typically used for e.g. whitespace.
            # We carry over any unmatched items from the to_scan buffer to be matched again after
            # the ignore. This should allow us to use ignored symbols in non-terminals to implement
            # e.g. mandatory spacing.
            for x in self.ignore:
                m = match(x, stream, i)
                if m:
                    # Carry over any items still in the scan buffer, to past the end of the ignored items.
                    delayed_matches[m.end()].extend([(item, column, None) for item in to_scan ])

                    # If we're ignoring up to the end of the file, # carry over the start symbol if it already completed.
                    delayed_matches[m.end()].extend([(item, column, None) for item in column.items if item.is_complete and item.s == start_symbol])

            next_set = Column(i + 1, self.FIRST)    # Ei+1
            next_to_scan = set()

            ## 4) Process Tokens from delayed_matches.
            # This is the core of the Earley scanner. Create an SPPF node for each Token,
            # and create the symbol node in the SPPF tree. Advance the item that completed,
            # and add the resulting new item to either the Earley set (for processing by the
            # completer/predictor) or the to_scan buffer for the next parse step.
            for item, start, token in delayed_matches[i+1]:
                if token is not None:
                    new_item = item.advance()
                    new_item.node = make_symbol_node(new_item.s, new_item.start, column)
                    new_item.node.add_family(new_item.s, item.rule, new_item.start, item.node, token)
                else:
                    new_item = item

                if new_item.expect in self.TERMINALS:
                    # add (B ::= Aai+1.B, h, y) to Q'
                    next_to_scan.add(new_item)
                else:
                    # add (B ::= Aa+1.B, h, y) to Ei+1
                    next_set.add(new_item)

            next_set = Column(i+1, self.FIRST, predict_all=self.predict_all)
            next_set.add(delayed_matches[i+1])
            del delayed_matches[i+1]    # No longer needed, so unburden memory

            if not next_set and not delayed_matches:
            if not next_set and not delayed_matches and not next_to_scan:
                raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect for item in to_scan}, set(to_scan))

            return next_set
            return next_set, next_to_scan

        # Main loop starts
        column0 = Column(0, self.FIRST, predict_all=self.predict_all)
        column0.add(predict(start_symbol, column0))

        column0 = Column(0, self.FIRST)
        column = column0

        ## The scan buffer. 'Q' in E.Scott's paper.
        to_scan = set()

        ## Predict for the start_symbol.
        # Add predicted items to the first Earley set (for the predictor) if they
        # result in a non-terminal, or the scanner if they result in a terminal.
        for rule in self.predictions[start_symbol]:
            item = Item(rule, 0, column0)
            if item.expect in self.TERMINALS:
                to_scan.add(item)
            else:
                column.add(item)

        ## The main Earley loop.
        # Run the Prediction/Completion cycle for any Items in the current Earley set.
        # Completions will be added to the SPPF tree, and predictions will be recursively
        # processed down to terminals/empty nodes to be added to the scanner for the next
        # step.
        for i, token in enumerate(stream):
            predict_and_complete(column)
            column = scan(i, column)
            predict_and_complete(column, to_scan)

            # Clear the node_cache and token_cache, which are only relevant for each
            # step in the Earley pass.
            node_cache.clear()
            token_cache.clear()
            column, to_scan = scan(i, column, to_scan)

            if token == '\n':
                text_line += 1
@@ -135,24 +251,24 @@ class Parser:
            else:
                text_column += 1

        predict_and_complete(column)
        predict_and_complete(column, to_scan)

        # Parse ended. Now build a parse tree
        solutions = [n.tree for n in column.to_reduce
                     if n.rule.origin==start_symbol and n.start is column0]
        ## Column is now the final column in the parse. If the parse was successful, the start
        # symbol should have been completed in the last step of the Earley cycle, and will be in
        # this column. Find the item for the start_symbol, which is the root of the SPPF tree.
        solutions = [n.node for n in column.items if n.is_complete and n.node is not None and n.s == start_symbol and n.start is column0]

        if not solutions:
            expected_tokens = [t.expect for t in column.to_scan]
            expected_tokens = [t.expect for t in to_scan]
            raise ParseError('Unexpected end of input! Expecting a terminal of: %s' % expected_tokens)
        elif len(solutions) > 1:
            raise Exception('Earley should not generate more than one start symbol - bug')

        elif len(solutions) == 1:
            tree = solutions[0]
        else:
            tree = Tree('_ambig', solutions)

        if self.resolve_ambiguity:
            tree = self.resolve_ambiguity(tree)

        return ApplyCallbacks(self.postprocess).transform(tree)

        ## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller.
        # This means the caller can work directly with the SPPF tree.
        if not self.resolve_ambiguity:
            return solutions[0]

        # ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
        # according to the rules.
        return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go()
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -4,7 +4,7 @@ from .tree import Tree
 from .visitors import Transformer_InPlace
 from .common import ParserConf
 from .lexer import Token, PatternStr
 from .parsers import earley, resolve_ambig
 from .parsers import earley
 from .grammar import Rule, Terminal, NonTerminal


@@ -114,7 +114,7 @@ class Reconstructor:

    def _reconstruct(self, tree):
        # TODO: ambiguity?
        parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=resolve_ambig.standard_resolve_ambig)
        parser = earley.Parser(ParserConf(self.rules, None, tree.data), self._match, resolve_ambiguity=True)
        unreduced_tree = parser.parse(tree.children)   # find a full derivation
        assert unreduced_tree.data == tree.data
        res = self.write_tokens.transform(unreduced_tree)
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -21,6 +21,8 @@ from lark.lark import Lark
 from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput
 from lark.tree import Tree
 from lark.visitors import Transformer
 from lark.parsers.earley_forest import ForestToAmbiguousTreeVisitor
 from lark.parsers.earley import ApplyCallbacks

 __path__ = os.path.dirname(__file__)
 def _read(n, *args):
@@ -236,10 +238,11 @@ def _make_full_earley_test(LEXER):
            """

            parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
            res = parser.parse('ab')

            self.assertEqual( res.data, '_ambig')
            self.assertEqual( len(res.children), 2)
            root_symbol = parser.parse('ab')
            ambig_tree = ForestToAmbiguousTreeVisitor(root_symbol, parser.parser.parser.callbacks).go()
            # print(ambig_tree.pretty())
            self.assertEqual( ambig_tree.data, '_ambig')
            self.assertEqual( len(ambig_tree.children), 2)

        def test_ambiguity1(self):
            grammar = """
@@ -251,9 +254,35 @@ def _make_full_earley_test(LEXER):

            """
            l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
            x = l.parse('cde')
            assert x.data == '_ambig', x
            assert len(x.children) == 2
            root_symbol = l.parse('cde')
            ambig_tree = ForestToAmbiguousTreeVisitor(root_symbol, l.parser.parser.callbacks).go()
            # print(ambig_tree.pretty())
 #            tree = ApplyCallbacks(l.parser.parser.postprocess).transform(ambig_tree)

            assert ambig_tree.data == '_ambig', ambig_tree
            assert len(ambig_tree.children) == 2

        @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions")
        def test_ambiguity2(self):
            grammar = """
            ANY:  /[a-zA-Z0-9 ]+/
            a.2: "A" b+
            b.2: "B" 
            c:   ANY

            start: (a|c)*
            """
            l = Lark(grammar, parser='earley', lexer=LEXER)
            res = l.parse('ABX')
            expected = Tree('start', [
                    Tree('a', [
                        Tree('b', [])
                    ]),
                    Tree('c', [
                        'X'
                    ])
                ])
            self.assertEqual(res, expected)

        def test_fruitflies_ambig(self):
            grammar = """
@@ -272,7 +301,9 @@ def _make_full_earley_test(LEXER):
                %ignore WS
            """
            parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
            res = parser.parse('fruit flies like bananas')
            root_symbol = parser.parse('fruit flies like bananas')
            tree = ForestToAmbiguousTreeVisitor(root_symbol, parser.parser.parser.callbacks).go()
 #            tree = ApplyCallbacks(parser.parser.parser.postprocess).transform(ambig_tree)

            expected = Tree('_ambig', [
                    Tree('comparative', [
@@ -290,7 +321,9 @@ def _make_full_earley_test(LEXER):
            # print res.pretty()
            # print expected.pretty()

            self.assertEqual(res, expected)
            # self.assertEqual(tree, expected)
            self.assertEqual(tree.data, expected.data)
            self.assertEqual(set(tree.children), set(expected.children))


        @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
@@ -303,7 +336,9 @@ def _make_full_earley_test(LEXER):
            text = """cat"""

            parser = Lark(grammar, start='start', ambiguity='explicit')
            tree = parser.parse(text)
            root_symbol = parser.parse(text)
            ambig_tree = ForestToAmbiguousTreeVisitor(root_symbol).go()
            tree = ApplyCallbacks(parser.parser.parser.postprocess).transform(ambig_tree)
            self.assertEqual(tree.data, '_ambig')

            combinations = {tuple(str(s) for s in t.children) for t in tree.children}