Browse Source

Rebuild the way Earley prioritizes ambiguities

- Makes rule ordering the default ambiguity tie breaker.
    E.g.
    start: a | b
    a: "A"
    b: "A"

    will return:
    start
        a

    start: b | a
    a: "A"
    b: "A"

    will return
    start
        b

- Replaces the ambiguity='resolve__antiscore_sum' with a separate option: 'priority'.
    The priority option has 4 values: 'auto', 'none', 'normal', 'invert'.
    'Auto' maps to 'Normal' for CYK and Earley and 'None' for LALR.
    'None' filters your priorities and ignores them. This saves some extra tree walking on Earley.
    'Normal' uses your priorities untouched, mimicing the old behaviour.
    'Invert' negates your priorities, emulating the old 'resolve__antiscore_sum' behaviour.

    This allows you to use priority logic even when ambiguity=='explicit', to get a better idea
    of the shape of your tree; and to easily disable priorities without removing them from the
    grammar for testing (or performance).

- ambiguity='explicit' now correctly returns an ambiguous tree again, as 0.6 did.
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.6
night199uk 6 years ago
parent
commit
80a09697fe
11 changed files with 135 additions and 148 deletions
  1. +4
    -2
      lark/grammar.py
  2. +25
    -1
      lark/lark.py
  3. +7
    -6
      lark/load_grammar.py
  4. +4
    -13
      lark/parser_frontends.py
  5. +15
    -10
      lark/parsers/earley.py
  6. +47
    -96
      lark/parsers/earley_forest.py
  7. +16
    -11
      lark/parsers/xearley.py
  8. +3
    -3
      lark/reconstruct.py
  9. +1
    -1
      lark/tools/standalone.py
  10. +6
    -0
      lark/utils.py
  11. +7
    -5
      tests/test_parser.py

+ 4
- 2
lark/grammar.py View File

@@ -38,12 +38,14 @@ class Rule(object):
"""
origin : a symbol
expansion : a list of symbols
order : index of this expansion amongst all rules of the same name
"""
__slots__ = ('origin', 'expansion', 'alias', 'options', '_hash')
def __init__(self, origin, expansion, alias=None, options=None):
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash')
def __init__(self, origin, expansion, order=0, alias=None, options=None):
self.origin = origin
self.expansion = expansion
self.alias = alias
self.order = order
self.options = options
self._hash = hash((self.origin, tuple(self.expansion)))



+ 25
- 1
lark/lark.py View File

@@ -43,6 +43,7 @@ class LarkOptions(object):
postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers.
start - The start symbol (Default: start)
profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False)
priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto)
propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches.
lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None
@@ -63,6 +64,7 @@ class LarkOptions(object):
self.transformer = o.pop('transformer', None)
self.start = o.pop('start', 'start')
self.profile = o.pop('profile', False)
self.priority = o.pop('priority', 'auto')
self.ambiguity = o.pop('ambiguity', 'auto')
self.propagate_positions = o.pop('propagate_positions', False)
self.lexer_callbacks = o.pop('lexer_callbacks', {})
@@ -154,7 +156,16 @@ class Lark:
disambig_parsers = ['earley', 'cyk']
assert self.options.parser in disambig_parsers, (
'Only %s supports disambiguation right now') % ', '.join(disambig_parsers)
assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum')
assert self.options.priority in ('auto', 'none', 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority)
if self.options.priority == 'auto':
if self.options.parser in ('earley', 'cyk', ):
self.options.priority = 'normal'
elif self.options.parser in ('lalr', ):
self.options.priority = 'none'
if self.options.priority in ('invert', 'normal'):
assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time"
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
assert self.options.ambiguity in ('resolve', 'explicit', 'auto', )

# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source)
@@ -162,6 +173,19 @@ class Lark:
# Compile the EBNF grammar into BNF
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile()

# If the user asked to invert the priorities, negate them all here.
# This replaces the old 'resolve__antiscore_sum' option.
if self.options.priority == 'invert':
for rule in self.rules:
if rule.options and rule.options.priority is not None:
rule.options.priority = -rule.options.priority
# Else, if the user asked to disable priorities, strip them from the
# rules. This allows the Earley parsers to skip an extra forest walk
# for improved performance, if you don't need them (or didn't specify any).
elif self.options.priority == 'none':
for rule in self.rules:
if rule.options and rule.options.priority is not None:
rule.options.priority = None
self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)

if self.options.parser:


+ 7
- 6
lark/load_grammar.py View File

@@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import LALR_TraditionalLexer
from .common import LexerConf, ParserConf
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .utils import classify, suppress
from .utils import classify, suppress, dedup_list
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken

from .tree import Tree, SlottedTree as ST
@@ -237,7 +237,7 @@ class SimplifyRule_Visitor(Visitor):
tree.data = 'expansions'
tree.children = [self.visit(ST('expansion', [option if i==j else other
for j, other in enumerate(tree.children)]))
for option in set(child.children)]
for option in dedup_list(child.children)]
self._flatten(tree)
break

@@ -252,7 +252,7 @@ class SimplifyRule_Visitor(Visitor):

def expansions(self, tree):
self._flatten(tree)
tree.children = list(set(tree.children))
tree.children = dedup_list(tree.children)


class RuleTreeToText(Transformer):
@@ -500,7 +500,8 @@ class Grammar:

simplify_rule = SimplifyRule_Visitor()
compiled_rules = []
for name, tree, options in rules:
for i, rule_content in enumerate(rules):
name, tree, options = rule_content
simplify_rule.visit(tree)
expansions = rule_tree_to_text.transform(tree)

@@ -517,7 +518,7 @@ class Grammar:
exp_options = options

assert all(isinstance(x, Symbol) for x in expansion), expansion
rule = Rule(NonTerminal(name), expansion, alias, exp_options)
rule = Rule(NonTerminal(name), expansion, i, alias, exp_options)
compiled_rules.append(rule)

return terminals, compiled_rules, self.ignore
@@ -639,7 +640,7 @@ class GrammarLoader:
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()]

rules = [options_from_rule(name, x) for name, x in RULES.items()]
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), None, o) for r, xs, o in rules for x in xs]
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, xs, o in rules for i, x in enumerate(xs)]
callback = ParseTreeBuilder(rules, ST).create_callback()
lexer_conf = LexerConf(terminals, ['WS', 'COMMENT'])



+ 4
- 13
lark/parser_frontends.py View File

@@ -54,16 +54,6 @@ class LALR_CustomLexer(WithLexer):
self.lexer_conf = lexer_conf
self.lexer = lexer_cls(lexer_conf)


def get_ambiguity_options(options):
if not options or options.ambiguity == 'resolve':
return {}
elif options.ambiguity == 'resolve__antiscore_sum':
return {'forest_sum_visitor': earley_forest.ForestAntiscoreSumVisitor}
elif options.ambiguity == 'explicit':
return {'resolve_ambiguity': False}
raise ValueError(options)

def tokenize_text(text):
line = 1
col_start_pos = 0
@@ -77,7 +67,8 @@ class Earley(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
self.init_traditional_lexer(lexer_conf)

self.parser = earley.Parser(parser_conf, self.match, **get_ambiguity_options(options))
resolve_ambiguity = options.ambiguity == 'resolve'
self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity)

def match(self, term, token):
return term.name == token.type
@@ -88,11 +79,11 @@ class XEarley:
self.token_by_name = {t.name:t for t in lexer_conf.tokens}

self._prepare_match(lexer_conf)

kw.update(get_ambiguity_options(options))
resolve_ambiguity = options.ambiguity == 'resolve'
self.parser = xearley.Parser(parser_conf,
self.match,
ignore=lexer_conf.ignore,
resolve_ambiguity=resolve_ambiguity,
**kw
)



+ 15
- 10
lark/parsers/earley.py View File

@@ -17,10 +17,10 @@ from ..exceptions import ParseError, UnexpectedToken
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal
from .earley_common import Item, TransitiveItem
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, Forest
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode

class Parser:
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor):
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True):
analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity
@@ -35,11 +35,22 @@ class Parser:
self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }

self.forest_sum_visitor = None
for rule in parser_conf.rules:
self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]

self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks)
## Detect if any rules have priorities set. If the user specified priority = "none" then
# the priorities will be stripped from all rules before they reach us, allowing us to
# skip the extra tree walk. We'll also skip this if the user just didn't specify priorities
# on any rules.
if self.forest_sum_visitor is None and rule.options and rule.options.priority is not None:
self.forest_sum_visitor = ForestSumVisitor()

if resolve_ambiguity:
self.forest_tree_visitor = ForestToTreeVisitor(self.callbacks, self.forest_sum_visitor)
else:
self.forest_tree_visitor = ForestToAmbiguousTreeVisitor(self.callbacks, self.forest_sum_visitor)
self.term_matcher = term_matcher


@@ -278,7 +289,6 @@ class Parser:
# Clear the node_cache and token_cache, which are only relevant for each
# step in the Earley pass.
node_cache.clear()
token_cache.clear()
to_scan = scan(i, token, to_scan)
i += 1

@@ -294,13 +304,8 @@ class Parser:
elif len(solutions) > 1:
raise ParseError('Earley should not generate multiple start symbol items!')

## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller.
# This means the caller can work directly with the SPPF tree.
if not self.resolve_ambiguity:
return Forest(solutions[0], self.callbacks)

# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
# according to the rules.
# Perform our SPPF -> AST conversion using the right ForestVisitor.
return self.forest_tree_visitor.go(solutions[0])

class ApplyCallbacks(Transformer_InPlace):


+ 47
- 96
lark/parsers/earley_forest.py View File

@@ -14,7 +14,9 @@ from ..lexer import Token
from ..utils import Str
from ..grammar import NonTerminal, Terminal, Symbol

from math import isinf
from collections import deque
from operator import attrgetter
from importlib import import_module

class ForestNode(object):
@@ -42,7 +44,10 @@ class SymbolNode(ForestNode):
self._children = set()
self.paths = set()
self.paths_loaded = False
self.priority = None

### We use inf here as it can be safely negated without resorting to conditionals,
# unlike None or float('NaN'), and sorts appropriately.
self.priority = float('-inf')
self.is_intermediate = isinstance(s, tuple)
self._hash = hash((self.s, self.start, self.end))

@@ -68,9 +73,8 @@ class SymbolNode(ForestNode):

@property
def children(self):
if not self.paths_loaded:
self.load_paths()
return self._children
if not self.paths_loaded: self.load_paths()
return sorted(self._children, key=attrgetter('sort_key'))

def __iter__(self):
return iter(self._children)
@@ -92,7 +96,7 @@ class SymbolNode(ForestNode):
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
else:
symbol = self.s.name
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority if self.priority is not None else 0)
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority)

class PackedNode(ForestNode):
"""
@@ -106,30 +110,30 @@ class PackedNode(ForestNode):
self.rule = rule
self.left = left
self.right = right
self.priority = None
self._hash = hash((self.s, self.start, self.left, self.right))
self.priority = float('-inf')
self._hash = hash((self.left, self.right))

@property
def is_empty(self):
return self.left is None and self.right is None

@property
def sort_key(self):
"""
Used to sort PackedNode children of SymbolNodes.
A SymbolNode has multiple PackedNodes if it matched
ambiguously. Hence, we use the sort order to identify
the order in which ambiguous children should be considered.
"""
return self.is_empty, -self.priority, -self.rule.order

def __iter__(self):
return iter([self.left, self.right])

def __lt__(self, other):
if self.is_empty and not other.is_empty: return True
if self.priority < other.priority: return True
return False

def __gt__(self, other):
if self.is_empty and not other.is_empty: return True
if self.priority > other.priority: return True
return False

def __eq__(self, other):
if not isinstance(other, PackedNode):
return False
return self is other or (self.s == other.s and self.start == other.start and self.left == other.left and self.right == other.right)
return self is other or (self.left == other.left and self.right == other.right)

def __hash__(self):
return self._hash
@@ -143,7 +147,7 @@ class PackedNode(ForestNode):
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after))
else:
symbol = self.s.name
return "({}, {}, {})".format(symbol, self.start, self.priority)
return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order)

class ForestVisitor(object):
"""
@@ -230,11 +234,17 @@ class ForestSumVisitor(ForestVisitor):
"""
A visitor for prioritizing ambiguous parts of the Forest.

This visitor is the default when resolving ambiguity. It pushes the priorities
from the rules into the SPPF nodes; and then sorts the packed node children
of ambiguous symbol or intermediate node according to the priorities.
This relies on the custom sort function provided in PackedNode.__lt__; which
uses these properties (and other factors) to sort the ambiguous packed nodes.
This visitor is used when support for explicit priorities on
rules is requested (whether normal, or invert). It walks the
forest (or subsets thereof) and cascades properties upwards
from the leaves.

It would be ideal to do this during parsing, however this would
require processing each Earley item multiple times. That's
a big performance drawback; so running a forest walk is the
lesser of two evils: there can be significantly more Earley
items created during parsing than there are SPPF nodes in the
final tree.
"""
def visit_packed_node_in(self, node):
return iter([node.left, node.right])
@@ -243,49 +253,13 @@ class ForestSumVisitor(ForestVisitor):
return iter(node.children)

def visit_packed_node_out(self, node):
node.priority = 0
if node.rule.options and node.rule.options.priority: node.priority += node.rule.options.priority
if node.right is not None and hasattr(node.right, 'priority'): node.priority += node.right.priority
if node.left is not None and hasattr(node.left, 'priority'): node.priority += node.left.priority
priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options and node.rule.options.priority else 0
priority += getattr(node.right, 'priority', 0)
priority += getattr(node.left, 'priority', 0)
node.priority = priority

def visit_symbol_node_out(self, node):
node.priority = max(child.priority for child in node.children)
node._children = sorted(node.children, reverse = True)

class ForestAntiscoreSumVisitor(ForestSumVisitor):
"""
A visitor for prioritizing ambiguous parts of the Forest.

This visitor is used when resolve_ambiguity == 'resolve__antiscore_sum'.
It pushes the priorities from the rules into the SPPF nodes, and implements
a 'least cost' mechanism for resolving ambiguity (reverse of the default
priority mechanism). It uses a custom __lt__ comparator key for sorting
the packed node children.
"""
def visit_symbol_node_out(self, node):
node.priority = min(child.priority for child in node.children)
node._children = sorted(node.children, key=AntiscoreSumComparator, reverse = True)

class AntiscoreSumComparator(object):
"""
An antiscore-sum comparator for PackedNode objects.

This allows 'sorting' an iterable of PackedNode objects so that they
are arranged lowest priority first.
"""
__slots__ = ['obj']
def __init__(self, obj, *args):
self.obj = obj

def __lt__(self, other):
if self.obj.is_empty and not other.obj.is_empty: return True
if self.obj.priority > other.obj.priority: return True
return False

def __gt__(self, other):
if self.obj.is_empty and not other.obj.is_empty: return True
if self.obj.priority < other.obj.priority: return True
return False

class ForestToTreeVisitor(ForestVisitor):
"""
@@ -299,9 +273,9 @@ class ForestToTreeVisitor(ForestVisitor):
implementation should be another ForestVisitor which sorts the children
according to some priority mechanism.
"""
__slots__ = ['forest_sum_visitor', 'output_stack', 'callbacks']
def __init__(self, forest_sum_visitor = ForestSumVisitor, callbacks = None):
self.forest_sum_visitor = forest_sum_visitor()
__slots__ = ['forest_sum_visitor', 'callbacks', 'output_stack']
def __init__(self, callbacks = None, forest_sum_visitor = None):
self.forest_sum_visitor = forest_sum_visitor
self.callbacks = callbacks

def go(self, root):
@@ -312,7 +286,7 @@ class ForestToTreeVisitor(ForestVisitor):
self.output_stack[-1].append(node)

def visit_symbol_node_in(self, node):
if node.is_ambiguous and node.priority is None:
if self.forest_sum_visitor and node.is_ambiguous and isinf(node.priority):
self.forest_sum_visitor.go(node)
return next(iter(node.children))

@@ -329,7 +303,7 @@ class ForestToTreeVisitor(ForestVisitor):
else:
self.result = result

class ForestToAmbiguousTreeVisitor(ForestVisitor):
class ForestToAmbiguousTreeVisitor(ForestToTreeVisitor):
"""
A Forest visitor which converts an SPPF forest to an ambiguous AST.

@@ -349,18 +323,15 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor):
This is mainly used by the test framework, to make it simpler to write
tests ensuring the SPPF contains the right results.
"""
__slots__ = ['output_stack', 'callbacks']
def __init__(self, callbacks):
self.callbacks = callbacks

def go(self, root):
self.output_stack = deque([])
return super(ForestToAmbiguousTreeVisitor, self).go(root)
def __init__(self, callbacks, forest_sum_visitor = ForestSumVisitor):
super(ForestToAmbiguousTreeVisitor, self).__init__(callbacks, forest_sum_visitor)

def visit_token_node(self, node):
self.output_stack[-1].children.append(node)

def visit_symbol_node_in(self, node):
if self.forest_sum_visitor and node.is_ambiguous and isinf(node.priority):
self.forest_sum_visitor.go(node)
if not node.is_intermediate and node.is_ambiguous:
self.output_stack.append(Tree('_ambig', []))
return iter(node.children)
@@ -374,9 +345,6 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor):
self.result = result

def visit_packed_node_in(self, node):
#### NOTE:
## When an intermediate node (node.parent.s == tuple) has ambiguous children this
## forest visitor will break.
if not node.parent.is_intermediate:
self.output_stack.append(Tree('drv', []))
return iter([node.left, node.right])
@@ -462,20 +430,3 @@ class ForestToPyDotVisitor(ForestVisitor):
child_graph_node_id = str(id(child))
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))

class Forest(Tree):
def __init__(self, root, callbacks):
self.root = root
self.callbacks = callbacks
self.data = '_ambig'
self._children = None

@property
def children(self):
if self._children is None:
t = ForestToAmbiguousTreeVisitor(self.callbacks).go(self.root)
self._children = t.children
return self._children

def to_pydot(self, filename):
ForestToPyDotVisitor().go(self.root, filename)

+ 16
- 11
lark/parsers/xearley.py View File

@@ -24,11 +24,11 @@ from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal, Terminal
from .earley import ApplyCallbacks
from .earley_common import Item, TransitiveItem
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, Forest
from .earley_forest import ForestToTreeVisitor, ForestToAmbiguousTreeVisitor, ForestSumVisitor, ForestToPyDotVisitor, SymbolNode


class Parser:
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor, ignore = (), complete_lex = False):
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False):
analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity
@@ -41,15 +41,25 @@ class Parser:
self.predictions = {}

## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than
# the slow 'isupper' in is_terminal.
# the slow 'isupper' in is_terminal; or even called sym.is_term directly.
self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term }
self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term }

self.forest_sum_visitor = None
for rule in parser_conf.rules:
self.callbacks[rule] = getattr(parser_conf.callback, rule.alias or rule.origin, None)
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]

self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks)
## Detect if any rules have priorities set. If the user specified priority = "none" then
# the priorities will be stripped from all rules before they reach us, allowing us to
# skip the extra tree walk.
if self.forest_sum_visitor is None and rule.options and rule.options.priority is not None:
self.forest_sum_visitor = ForestSumVisitor()

if resolve_ambiguity:
self.forest_tree_visitor = ForestToTreeVisitor(self.callbacks, self.forest_sum_visitor)
else:
self.forest_tree_visitor = ForestToAmbiguousTreeVisitor(self.callbacks, self.forest_sum_visitor)
self.term_matcher = term_matcher

def parse(self, stream, start_symbol=None):
@@ -362,11 +372,6 @@ class Parser:
elif len(solutions) > 1:
raise Exception('Earley should not generate more than one start symbol - bug')

## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller.
# This means the caller can work directly with the SPPF tree.
if not self.resolve_ambiguity:
return Forest(solutions[0], self.callbacks)

# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
# according to the rules.
# Perform our SPPF -> AST conversion using the right ForestVisitor.
return self.forest_tree_visitor.go(solutions[0])


+ 3
- 3
lark/reconstruct.py View File

@@ -96,13 +96,13 @@ class Reconstructor:

sym = NonTerminal(r.alias) if r.alias else r.origin

yield Rule(sym, recons_exp, MakeMatchTree(sym.name, r.expansion))
yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion))

for origin, rule_aliases in aliases.items():
for alias in rule_aliases:
yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)]))
yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)]))
yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin]))
yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin]))




+ 1
- 1
lark/tools/standalone.py View File

@@ -208,7 +208,7 @@ class TreeBuilderAtoms:
print('RULES = {')
for i, r in enumerate(self.rules):
rule_ids[r] = i
print(' %d: Rule(%r, [%s], %r, %r),' % (i, r.origin, ', '.join(s.fullrepr for s in r.expansion), self.ptb.user_aliases[r], r.options ))
print(' %d: Rule(%r, [%s], alias=%r, options=%r),' % (i, r.origin, ', '.join(s.fullrepr for s in r.expansion), self.ptb.user_aliases[r], r.options ))
print('}')
print('parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree)')



+ 6
- 0
lark/utils.py View File

@@ -75,6 +75,12 @@ def smart_decorator(f, create_decorator):
else:
return create_decorator(f.__func__.__call__, True)

def dedup_list(l):
"""Given a list (l) will removing duplicates from the list,
preserving the original order of the list. Assumes that
the list entrie are hashable."""
dedup = set()
return [ x for x in l if not (x in dedup or dedup.add(x))]

###}



+ 7
- 5
tests/test_parser.py View File

@@ -201,7 +201,7 @@ def _make_full_earley_test(LEXER):

l = Lark(grammar, parser='earley', lexer=LEXER)
res = l.parse("aaa")
self.assertEqual(res.children, ['aa', 'a'])
self.assertEqual(res.children, ['a', 'aa'])

def test_earley4(self):
grammar = """
@@ -211,6 +211,7 @@ def _make_full_earley_test(LEXER):

l = Lark(grammar, parser='earley', lexer=LEXER)
res = l.parse("aaa")
# print(res.pretty())
self.assertEqual(res.children, ['aaa'])

def test_earley_repeating_empty(self):
@@ -1069,7 +1070,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.1: "bb"
"""

l = Lark(grammar, ambiguity='resolve__antiscore_sum')
l = Lark(grammar, priority="invert")
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

@@ -1082,8 +1083,9 @@ def _make_parser_test(LEXER, PARSER):
bb_: "bb"
"""

l = Lark(grammar, ambiguity='resolve__antiscore_sum')
l = Lark(grammar, priority="invert")
res = l.parse('abba')
# print(res.pretty())
self.assertEqual(''.join(child.data for child in res.children), 'indirection')

grammar = """
@@ -1095,7 +1097,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.3: "bb"
"""

l = Lark(grammar, ambiguity='resolve__antiscore_sum')
l = Lark(grammar, priority="invert")
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')

@@ -1108,7 +1110,7 @@ def _make_parser_test(LEXER, PARSER):
bb_.3: "bb"
"""

l = Lark(grammar, ambiguity='resolve__antiscore_sum')
l = Lark(grammar, priority="invert")
res = l.parse('abba')
self.assertEqual(''.join(child.data for child in res.children), 'indirection')



Loading…
Cancel
Save