- Makes rule ordering the default ambiguity tie breaker. E.g. start: a | b a: "A" b: "A" will return: start a start: b | a a: "A" b: "A" will return start b - Replaces the ambiguity='resolve__antiscore_sum' with a separate option: 'priority'. The priority option has 4 values: 'auto', 'none', 'normal', 'invert'. 'Auto' maps to 'Normal' for CYK and Earley and 'None' for LALR. 'None' filters your priorities and ignores them. This saves some extra tree walking on Earley. 'Normal' uses your priorities untouched, mimicing the old behaviour. 'Invert' negates your priorities, emulating the old 'resolve__antiscore_sum' behaviour. This allows you to use priority logic even when ambiguity=='explicit', to get a better idea of the shape of your tree; and to easily disable priorities without removing them from the grammar for testing (or performance). - ambiguity='explicit' now correctly returns an ambiguous tree again, as 0.6 did.tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.6
@@ -38,12 +38,14 @@ class Rule(object): | |||
""" | |||
origin : a symbol | |||
expansion : a list of symbols | |||
order : index of this expansion amongst all rules of the same name | |||
""" | |||
__slots__ = ('origin', 'expansion', 'alias', 'options', '_hash') | |||
def __init__(self, origin, expansion, alias=None, options=None): | |||
__slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') | |||
def __init__(self, origin, expansion, order=0, alias=None, options=None): | |||
self.origin = origin | |||
self.expansion = expansion | |||
self.alias = alias | |||
self.order = order | |||
self.options = options | |||
self._hash = hash((self.origin, tuple(self.expansion))) | |||
@@ -43,6 +43,7 @@ class LarkOptions(object): | |||
postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers. | |||
start - The start symbol (Default: start) | |||
profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) | |||
priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto) | |||
propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches. | |||
lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. | |||
maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None | |||
@@ -63,6 +64,7 @@ class LarkOptions(object): | |||
self.transformer = o.pop('transformer', None) | |||
self.start = o.pop('start', 'start') | |||
self.profile = o.pop('profile', False) | |||
self.priority = o.pop('priority', 'auto') | |||
self.ambiguity = o.pop('ambiguity', 'auto') | |||
self.propagate_positions = o.pop('propagate_positions', False) | |||
self.lexer_callbacks = o.pop('lexer_callbacks', {}) | |||
@@ -154,7 +156,16 @@ class Lark: | |||
disambig_parsers = ['earley', 'cyk'] | |||
assert self.options.parser in disambig_parsers, ( | |||
'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) | |||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum') | |||
assert self.options.priority in ('auto', 'none', 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) | |||
if self.options.priority == 'auto': | |||
if self.options.parser in ('earley', 'cyk', ): | |||
self.options.priority = 'normal' | |||
elif self.options.parser in ('lalr', ): | |||
self.options.priority = 'none' | |||
if self.options.priority in ('invert', 'normal'): | |||
assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time" | |||
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' | |||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) | |||
# Parse the grammar file and compose the grammars (TODO) | |||
self.grammar = load_grammar(grammar, self.source) | |||
@@ -162,6 +173,19 @@ class Lark: | |||
# Compile the EBNF grammar into BNF | |||
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile() | |||
# If the user asked to invert the priorities, negate them all here. | |||
# This replaces the old 'resolve__antiscore_sum' option. | |||
if self.options.priority == 'invert': | |||
for rule in self.rules: | |||
if rule.options and rule.options.priority is not None: | |||
rule.options.priority = -rule.options.priority | |||
# Else, if the user asked to disable priorities, strip them from the | |||
# rules. This allows the Earley parsers to skip an extra forest walk | |||
# for improved performance, if you don't need them (or didn't specify any). | |||
elif self.options.priority == 'none': | |||
for rule in self.rules: | |||
if rule.options and rule.options.priority is not None: | |||
rule.options.priority = None | |||
self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks) | |||
if self.options.parser: | |||
@@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder | |||
from .parser_frontends import LALR_TraditionalLexer | |||
from .common import LexerConf, ParserConf | |||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol | |||
from .utils import classify, suppress | |||
from .utils import classify, suppress, dedup_list | |||
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken | |||
from .tree import Tree, SlottedTree as ST | |||
@@ -237,7 +237,7 @@ class SimplifyRule_Visitor(Visitor): | |||
tree.data = 'expansions' | |||
tree.children = [self.visit(ST('expansion', [option if i==j else other | |||
for j, other in enumerate(tree.children)])) | |||
for option in set(child.children)] | |||
for option in dedup_list(child.children)] | |||
self._flatten(tree) | |||
break | |||
@@ -252,7 +252,7 @@ class SimplifyRule_Visitor(Visitor): | |||
def expansions(self, tree): | |||
self._flatten(tree) | |||
tree.children = list(set(tree.children)) | |||
tree.children = dedup_list(tree.children) | |||
class RuleTreeToText(Transformer): | |||
@@ -500,7 +500,8 @@ class Grammar: | |||
simplify_rule = SimplifyRule_Visitor() | |||
compiled_rules = [] | |||
for name, tree, options in rules: | |||
for i, rule_content in enumerate(rules): | |||
name, tree, options = rule_content | |||
simplify_rule.visit(tree) | |||
expansions = rule_tree_to_text.transform(tree) | |||
@@ -517,7 +518,7 @@ class Grammar: | |||
exp_options = options | |||
assert all(isinstance(x, Symbol) for x in expansion), expansion | |||
rule = Rule(NonTerminal(name), expansion, alias, exp_options) | |||
rule = Rule(NonTerminal(name), expansion, i, alias, exp_options) | |||
compiled_rules.append(rule) | |||
return terminals, compiled_rules, self.ignore | |||
@@ -639,7 +640,7 @@ class GrammarLoader: | |||
terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | |||
rules = [options_from_rule(name, x) for name, x in RULES.items()] | |||
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), None, o) for r, xs, o in rules for x in xs] | |||
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, xs, o in rules for i, x in enumerate(xs)] | |||
callback = ParseTreeBuilder(rules, ST).create_callback() | |||
lexer_conf = LexerConf(terminals, ['WS', 'COMMENT']) | |||
@@ -54,16 +54,6 @@ class LALR_CustomLexer(WithLexer): | |||
self.lexer_conf = lexer_conf | |||
self.lexer = lexer_cls(lexer_conf) | |||
def get_ambiguity_options(options): | |||
if not options or options.ambiguity == 'resolve': | |||
return {} | |||
elif options.ambiguity == 'resolve__antiscore_sum': | |||
return {'forest_sum_visitor': earley_forest.ForestAntiscoreSumVisitor} | |||
elif options.ambiguity == 'explicit': | |||
return {'resolve_ambiguity': False} | |||
raise ValueError(options) | |||
def tokenize_text(text): | |||
line = 1 | |||
col_start_pos = 0 | |||
@@ -77,7 +67,8 @@ class Earley(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
self.init_traditional_lexer(lexer_conf) | |||
self.parser = earley.Parser(parser_conf, self.match, **get_ambiguity_options(options)) | |||
resolve_ambiguity = options.ambiguity == 'resolve' | |||
self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity) | |||
def match(self, term, token): | |||
return term.name == token.type | |||
@@ -88,11 +79,11 @@ class XEarley: | |||
self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
self._prepare_match(lexer_conf) | |||
kw.update(get_ambiguity_options(options)) | |||
resolve_ambiguity = options.ambiguity == 'resolve' | |||
self.parser = xearley.Parser(parser_conf, | |||
self.match, | |||
ignore=lexer_conf.ignore, | |||
resolve_ambiguity=resolve_ambiguity, | |||
**kw | |||
) | |||
@@ -17,10 +17,10 @@ from ..exceptions import ParseError, UnexpectedToken | |||
from .grammar_analysis import GrammarAnalyzer | |||
from ..grammar import NonTerminal | |||
from .earley_common import Item, TransitiveItem | |||
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, Forest | |||
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode | |||
class Parser: | |||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor): | |||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True): | |||
analysis = GrammarAnalyzer(parser_conf) | |||
self.parser_conf = parser_conf | |||
self.resolve_ambiguity = resolve_ambiguity | |||
@@ -35,11 +35,22 @@ class Parser: | |||
self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term } | |||
self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term } | |||
self.forest_sum_visitor = None | |||
for rule in parser_conf.rules: | |||
self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias) | |||
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] | |||
self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks) | |||
## Detect if any rules have priorities set. If the user specified priority = "none" then | |||
# the priorities will be stripped from all rules before they reach us, allowing us to | |||
# skip the extra tree walk. We'll also skip this if the user just didn't specify priorities | |||
# on any rules. | |||
if self.forest_sum_visitor is None and rule.options and rule.options.priority is not None: | |||
self.forest_sum_visitor = ForestSumVisitor() | |||
if resolve_ambiguity: | |||
self.forest_tree_visitor = ForestToTreeVisitor(self.callbacks, self.forest_sum_visitor) | |||
else: | |||
self.forest_tree_visitor = ForestToAmbiguousTreeVisitor(self.callbacks, self.forest_sum_visitor) | |||
self.term_matcher = term_matcher | |||
@@ -278,7 +289,6 @@ class Parser: | |||
# Clear the node_cache and token_cache, which are only relevant for each | |||
# step in the Earley pass. | |||
node_cache.clear() | |||
token_cache.clear() | |||
to_scan = scan(i, token, to_scan) | |||
i += 1 | |||
@@ -294,13 +304,8 @@ class Parser: | |||
elif len(solutions) > 1: | |||
raise ParseError('Earley should not generate multiple start symbol items!') | |||
## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller. | |||
# This means the caller can work directly with the SPPF tree. | |||
if not self.resolve_ambiguity: | |||
return Forest(solutions[0], self.callbacks) | |||
# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities | |||
# according to the rules. | |||
# Perform our SPPF -> AST conversion using the right ForestVisitor. | |||
return self.forest_tree_visitor.go(solutions[0]) | |||
class ApplyCallbacks(Transformer_InPlace): | |||
@@ -14,7 +14,9 @@ from ..lexer import Token | |||
from ..utils import Str | |||
from ..grammar import NonTerminal, Terminal, Symbol | |||
from math import isinf | |||
from collections import deque | |||
from operator import attrgetter | |||
from importlib import import_module | |||
class ForestNode(object): | |||
@@ -42,7 +44,10 @@ class SymbolNode(ForestNode): | |||
self._children = set() | |||
self.paths = set() | |||
self.paths_loaded = False | |||
self.priority = None | |||
### We use inf here as it can be safely negated without resorting to conditionals, | |||
# unlike None or float('NaN'), and sorts appropriately. | |||
self.priority = float('-inf') | |||
self.is_intermediate = isinstance(s, tuple) | |||
self._hash = hash((self.s, self.start, self.end)) | |||
@@ -68,9 +73,8 @@ class SymbolNode(ForestNode): | |||
@property | |||
def children(self): | |||
if not self.paths_loaded: | |||
self.load_paths() | |||
return self._children | |||
if not self.paths_loaded: self.load_paths() | |||
return sorted(self._children, key=attrgetter('sort_key')) | |||
def __iter__(self): | |||
return iter(self._children) | |||
@@ -92,7 +96,7 @@ class SymbolNode(ForestNode): | |||
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) | |||
else: | |||
symbol = self.s.name | |||
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority if self.priority is not None else 0) | |||
return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority) | |||
class PackedNode(ForestNode): | |||
""" | |||
@@ -106,30 +110,30 @@ class PackedNode(ForestNode): | |||
self.rule = rule | |||
self.left = left | |||
self.right = right | |||
self.priority = None | |||
self._hash = hash((self.s, self.start, self.left, self.right)) | |||
self.priority = float('-inf') | |||
self._hash = hash((self.left, self.right)) | |||
@property | |||
def is_empty(self): | |||
return self.left is None and self.right is None | |||
@property | |||
def sort_key(self): | |||
""" | |||
Used to sort PackedNode children of SymbolNodes. | |||
A SymbolNode has multiple PackedNodes if it matched | |||
ambiguously. Hence, we use the sort order to identify | |||
the order in which ambiguous children should be considered. | |||
""" | |||
return self.is_empty, -self.priority, -self.rule.order | |||
def __iter__(self): | |||
return iter([self.left, self.right]) | |||
def __lt__(self, other): | |||
if self.is_empty and not other.is_empty: return True | |||
if self.priority < other.priority: return True | |||
return False | |||
def __gt__(self, other): | |||
if self.is_empty and not other.is_empty: return True | |||
if self.priority > other.priority: return True | |||
return False | |||
def __eq__(self, other): | |||
if not isinstance(other, PackedNode): | |||
return False | |||
return self is other or (self.s == other.s and self.start == other.start and self.left == other.left and self.right == other.right) | |||
return self is other or (self.left == other.left and self.right == other.right) | |||
def __hash__(self): | |||
return self._hash | |||
@@ -143,7 +147,7 @@ class PackedNode(ForestNode): | |||
symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) | |||
else: | |||
symbol = self.s.name | |||
return "({}, {}, {})".format(symbol, self.start, self.priority) | |||
return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order) | |||
class ForestVisitor(object): | |||
""" | |||
@@ -230,11 +234,17 @@ class ForestSumVisitor(ForestVisitor): | |||
""" | |||
A visitor for prioritizing ambiguous parts of the Forest. | |||
This visitor is the default when resolving ambiguity. It pushes the priorities | |||
from the rules into the SPPF nodes; and then sorts the packed node children | |||
of ambiguous symbol or intermediate node according to the priorities. | |||
This relies on the custom sort function provided in PackedNode.__lt__; which | |||
uses these properties (and other factors) to sort the ambiguous packed nodes. | |||
This visitor is used when support for explicit priorities on | |||
rules is requested (whether normal, or invert). It walks the | |||
forest (or subsets thereof) and cascades properties upwards | |||
from the leaves. | |||
It would be ideal to do this during parsing, however this would | |||
require processing each Earley item multiple times. That's | |||
a big performance drawback; so running a forest walk is the | |||
lesser of two evils: there can be significantly more Earley | |||
items created during parsing than there are SPPF nodes in the | |||
final tree. | |||
""" | |||
def visit_packed_node_in(self, node): | |||
return iter([node.left, node.right]) | |||
@@ -243,49 +253,13 @@ class ForestSumVisitor(ForestVisitor): | |||
return iter(node.children) | |||
def visit_packed_node_out(self, node): | |||
node.priority = 0 | |||
if node.rule.options and node.rule.options.priority: node.priority += node.rule.options.priority | |||
if node.right is not None and hasattr(node.right, 'priority'): node.priority += node.right.priority | |||
if node.left is not None and hasattr(node.left, 'priority'): node.priority += node.left.priority | |||
priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options and node.rule.options.priority else 0 | |||
priority += getattr(node.right, 'priority', 0) | |||
priority += getattr(node.left, 'priority', 0) | |||
node.priority = priority | |||
def visit_symbol_node_out(self, node): | |||
node.priority = max(child.priority for child in node.children) | |||
node._children = sorted(node.children, reverse = True) | |||
class ForestAntiscoreSumVisitor(ForestSumVisitor): | |||
""" | |||
A visitor for prioritizing ambiguous parts of the Forest. | |||
This visitor is used when resolve_ambiguity == 'resolve__antiscore_sum'. | |||
It pushes the priorities from the rules into the SPPF nodes, and implements | |||
a 'least cost' mechanism for resolving ambiguity (reverse of the default | |||
priority mechanism). It uses a custom __lt__ comparator key for sorting | |||
the packed node children. | |||
""" | |||
def visit_symbol_node_out(self, node): | |||
node.priority = min(child.priority for child in node.children) | |||
node._children = sorted(node.children, key=AntiscoreSumComparator, reverse = True) | |||
class AntiscoreSumComparator(object): | |||
""" | |||
An antiscore-sum comparator for PackedNode objects. | |||
This allows 'sorting' an iterable of PackedNode objects so that they | |||
are arranged lowest priority first. | |||
""" | |||
__slots__ = ['obj'] | |||
def __init__(self, obj, *args): | |||
self.obj = obj | |||
def __lt__(self, other): | |||
if self.obj.is_empty and not other.obj.is_empty: return True | |||
if self.obj.priority > other.obj.priority: return True | |||
return False | |||
def __gt__(self, other): | |||
if self.obj.is_empty and not other.obj.is_empty: return True | |||
if self.obj.priority < other.obj.priority: return True | |||
return False | |||
class ForestToTreeVisitor(ForestVisitor): | |||
""" | |||
@@ -299,9 +273,9 @@ class ForestToTreeVisitor(ForestVisitor): | |||
implementation should be another ForestVisitor which sorts the children | |||
according to some priority mechanism. | |||
""" | |||
__slots__ = ['forest_sum_visitor', 'output_stack', 'callbacks'] | |||
def __init__(self, forest_sum_visitor = ForestSumVisitor, callbacks = None): | |||
self.forest_sum_visitor = forest_sum_visitor() | |||
__slots__ = ['forest_sum_visitor', 'callbacks', 'output_stack'] | |||
def __init__(self, callbacks = None, forest_sum_visitor = None): | |||
self.forest_sum_visitor = forest_sum_visitor | |||
self.callbacks = callbacks | |||
def go(self, root): | |||
@@ -312,7 +286,7 @@ class ForestToTreeVisitor(ForestVisitor): | |||
self.output_stack[-1].append(node) | |||
def visit_symbol_node_in(self, node): | |||
if node.is_ambiguous and node.priority is None: | |||
if self.forest_sum_visitor and node.is_ambiguous and isinf(node.priority): | |||
self.forest_sum_visitor.go(node) | |||
return next(iter(node.children)) | |||
@@ -329,7 +303,7 @@ class ForestToTreeVisitor(ForestVisitor): | |||
else: | |||
self.result = result | |||
class ForestToAmbiguousTreeVisitor(ForestVisitor): | |||
class ForestToAmbiguousTreeVisitor(ForestToTreeVisitor): | |||
""" | |||
A Forest visitor which converts an SPPF forest to an ambiguous AST. | |||
@@ -349,18 +323,15 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor): | |||
This is mainly used by the test framework, to make it simpler to write | |||
tests ensuring the SPPF contains the right results. | |||
""" | |||
__slots__ = ['output_stack', 'callbacks'] | |||
def __init__(self, callbacks): | |||
self.callbacks = callbacks | |||
def go(self, root): | |||
self.output_stack = deque([]) | |||
return super(ForestToAmbiguousTreeVisitor, self).go(root) | |||
def __init__(self, callbacks, forest_sum_visitor = ForestSumVisitor): | |||
super(ForestToAmbiguousTreeVisitor, self).__init__(callbacks, forest_sum_visitor) | |||
def visit_token_node(self, node): | |||
self.output_stack[-1].children.append(node) | |||
def visit_symbol_node_in(self, node): | |||
if self.forest_sum_visitor and node.is_ambiguous and isinf(node.priority): | |||
self.forest_sum_visitor.go(node) | |||
if not node.is_intermediate and node.is_ambiguous: | |||
self.output_stack.append(Tree('_ambig', [])) | |||
return iter(node.children) | |||
@@ -374,9 +345,6 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor): | |||
self.result = result | |||
def visit_packed_node_in(self, node): | |||
#### NOTE: | |||
## When an intermediate node (node.parent.s == tuple) has ambiguous children this | |||
## forest visitor will break. | |||
if not node.parent.is_intermediate: | |||
self.output_stack.append(Tree('drv', [])) | |||
return iter([node.left, node.right]) | |||
@@ -462,20 +430,3 @@ class ForestToPyDotVisitor(ForestVisitor): | |||
child_graph_node_id = str(id(child)) | |||
child_graph_node = self.graph.get_node(child_graph_node_id)[0] | |||
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node)) | |||
class Forest(Tree): | |||
def __init__(self, root, callbacks): | |||
self.root = root | |||
self.callbacks = callbacks | |||
self.data = '_ambig' | |||
self._children = None | |||
@property | |||
def children(self): | |||
if self._children is None: | |||
t = ForestToAmbiguousTreeVisitor(self.callbacks).go(self.root) | |||
self._children = t.children | |||
return self._children | |||
def to_pydot(self, filename): | |||
ForestToPyDotVisitor().go(self.root, filename) |
@@ -24,11 +24,11 @@ from .grammar_analysis import GrammarAnalyzer | |||
from ..grammar import NonTerminal, Terminal | |||
from .earley import ApplyCallbacks | |||
from .earley_common import Item, TransitiveItem | |||
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, Forest | |||
from .earley_forest import ForestToTreeVisitor, ForestToAmbiguousTreeVisitor, ForestSumVisitor, ForestToPyDotVisitor, SymbolNode | |||
class Parser: | |||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor, ignore = (), complete_lex = False): | |||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False): | |||
analysis = GrammarAnalyzer(parser_conf) | |||
self.parser_conf = parser_conf | |||
self.resolve_ambiguity = resolve_ambiguity | |||
@@ -41,15 +41,25 @@ class Parser: | |||
self.predictions = {} | |||
## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than | |||
# the slow 'isupper' in is_terminal. | |||
# the slow 'isupper' in is_terminal; or even called sym.is_term directly. | |||
self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term } | |||
self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term } | |||
self.forest_sum_visitor = None | |||
for rule in parser_conf.rules: | |||
self.callbacks[rule] = getattr(parser_conf.callback, rule.alias or rule.origin, None) | |||
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] | |||
self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks) | |||
## Detect if any rules have priorities set. If the user specified priority = "none" then | |||
# the priorities will be stripped from all rules before they reach us, allowing us to | |||
# skip the extra tree walk. | |||
if self.forest_sum_visitor is None and rule.options and rule.options.priority is not None: | |||
self.forest_sum_visitor = ForestSumVisitor() | |||
if resolve_ambiguity: | |||
self.forest_tree_visitor = ForestToTreeVisitor(self.callbacks, self.forest_sum_visitor) | |||
else: | |||
self.forest_tree_visitor = ForestToAmbiguousTreeVisitor(self.callbacks, self.forest_sum_visitor) | |||
self.term_matcher = term_matcher | |||
def parse(self, stream, start_symbol=None): | |||
@@ -362,11 +372,6 @@ class Parser: | |||
elif len(solutions) > 1: | |||
raise Exception('Earley should not generate more than one start symbol - bug') | |||
## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller. | |||
# This means the caller can work directly with the SPPF tree. | |||
if not self.resolve_ambiguity: | |||
return Forest(solutions[0], self.callbacks) | |||
# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities | |||
# according to the rules. | |||
# Perform our SPPF -> AST conversion using the right ForestVisitor. | |||
return self.forest_tree_visitor.go(solutions[0]) | |||
@@ -96,13 +96,13 @@ class Reconstructor: | |||
sym = NonTerminal(r.alias) if r.alias else r.origin | |||
yield Rule(sym, recons_exp, MakeMatchTree(sym.name, r.expansion)) | |||
yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion)) | |||
for origin, rule_aliases in aliases.items(): | |||
for alias in rule_aliases: | |||
yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)])) | |||
yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)])) | |||
yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin])) | |||
yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin])) | |||
@@ -208,7 +208,7 @@ class TreeBuilderAtoms: | |||
print('RULES = {') | |||
for i, r in enumerate(self.rules): | |||
rule_ids[r] = i | |||
print(' %d: Rule(%r, [%s], %r, %r),' % (i, r.origin, ', '.join(s.fullrepr for s in r.expansion), self.ptb.user_aliases[r], r.options )) | |||
print(' %d: Rule(%r, [%s], alias=%r, options=%r),' % (i, r.origin, ', '.join(s.fullrepr for s in r.expansion), self.ptb.user_aliases[r], r.options )) | |||
print('}') | |||
print('parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree)') | |||
@@ -75,6 +75,12 @@ def smart_decorator(f, create_decorator): | |||
else: | |||
return create_decorator(f.__func__.__call__, True) | |||
def dedup_list(l): | |||
"""Given a list (l) will removing duplicates from the list, | |||
preserving the original order of the list. Assumes that | |||
the list entrie are hashable.""" | |||
dedup = set() | |||
return [ x for x in l if not (x in dedup or dedup.add(x))] | |||
###} | |||
@@ -201,7 +201,7 @@ def _make_full_earley_test(LEXER): | |||
l = Lark(grammar, parser='earley', lexer=LEXER) | |||
res = l.parse("aaa") | |||
self.assertEqual(res.children, ['aa', 'a']) | |||
self.assertEqual(res.children, ['a', 'aa']) | |||
def test_earley4(self): | |||
grammar = """ | |||
@@ -211,6 +211,7 @@ def _make_full_earley_test(LEXER): | |||
l = Lark(grammar, parser='earley', lexer=LEXER) | |||
res = l.parse("aaa") | |||
# print(res.pretty()) | |||
self.assertEqual(res.children, ['aaa']) | |||
def test_earley_repeating_empty(self): | |||
@@ -1069,7 +1070,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_.1: "bb" | |||
""" | |||
l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||
l = Lark(grammar, priority="invert") | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
@@ -1082,8 +1083,9 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_: "bb" | |||
""" | |||
l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||
l = Lark(grammar, priority="invert") | |||
res = l.parse('abba') | |||
# print(res.pretty()) | |||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||
grammar = """ | |||
@@ -1095,7 +1097,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_.3: "bb" | |||
""" | |||
l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||
l = Lark(grammar, priority="invert") | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') | |||
@@ -1108,7 +1110,7 @@ def _make_parser_test(LEXER, PARSER): | |||
bb_.3: "bb" | |||
""" | |||
l = Lark(grammar, ambiguity='resolve__antiscore_sum') | |||
l = Lark(grammar, priority="invert") | |||
res = l.parse('abba') | |||
self.assertEqual(''.join(child.data for child in res.children), 'indirection') | |||