diff --git a/lark/grammar.py b/lark/grammar.py index 53ce529..e171d52 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -38,12 +38,14 @@ class Rule(object): """ origin : a symbol expansion : a list of symbols + order : index of this expansion amongst all rules of the same name """ - __slots__ = ('origin', 'expansion', 'alias', 'options', '_hash') - def __init__(self, origin, expansion, alias=None, options=None): + __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') + def __init__(self, origin, expansion, order=0, alias=None, options=None): self.origin = origin self.expansion = expansion self.alias = alias + self.order = order self.options = options self._hash = hash((self.origin, tuple(self.expansion))) diff --git a/lark/lark.py b/lark/lark.py index fdf586e..19f82c3 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -43,6 +43,7 @@ class LarkOptions(object): postlex - Lexer post-processing (Default: None) Only works with the standard and contextual lexers. start - The start symbol (Default: start) profile - Measure run-time usage in Lark. Read results from the profiler proprety (Default: False) + priority - How priorities should be evaluated - auto, none, normal, invert (Default: auto) propagate_positions - Propagates [line, column, end_line, end_column] attributes into all tree branches. lexer_callbacks - Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution. maybe_placeholders - Experimental feature. Instead of omitting optional rules (i.e. rule?), replace them with None @@ -63,6 +64,7 @@ class LarkOptions(object): self.transformer = o.pop('transformer', None) self.start = o.pop('start', 'start') self.profile = o.pop('profile', False) + self.priority = o.pop('priority', 'auto') self.ambiguity = o.pop('ambiguity', 'auto') self.propagate_positions = o.pop('propagate_positions', False) self.lexer_callbacks = o.pop('lexer_callbacks', {}) @@ -154,7 +156,16 @@ class Lark: disambig_parsers = ['earley', 'cyk'] assert self.options.parser in disambig_parsers, ( 'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) - assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum') + assert self.options.priority in ('auto', 'none', 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) + if self.options.priority == 'auto': + if self.options.parser in ('earley', 'cyk', ): + self.options.priority = 'normal' + elif self.options.parser in ('lalr', ): + self.options.priority = 'none' + if self.options.priority in ('invert', 'normal'): + assert self.options.parser in ('earley', 'cyk'), "priorities are not supported for LALR at this time" + assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' + assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) # Parse the grammar file and compose the grammars (TODO) self.grammar = load_grammar(grammar, self.source) @@ -162,6 +173,19 @@ class Lark: # Compile the EBNF grammar into BNF self.terminals, self.rules, self.ignore_tokens = self.grammar.compile() + # If the user asked to invert the priorities, negate them all here. + # This replaces the old 'resolve__antiscore_sum' option. + if self.options.priority == 'invert': + for rule in self.rules: + if rule.options and rule.options.priority is not None: + rule.options.priority = -rule.options.priority + # Else, if the user asked to disable priorities, strip them from the + # rules. This allows the Earley parsers to skip an extra forest walk + # for improved performance, if you don't need them (or didn't specify any). + elif self.options.priority == 'none': + for rule in self.rules: + if rule.options and rule.options.priority is not None: + rule.options.priority = None self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks) if self.options.parser: diff --git a/lark/load_grammar.py b/lark/load_grammar.py index a9de749..4400d43 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import LALR_TraditionalLexer from .common import LexerConf, ParserConf from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol -from .utils import classify, suppress +from .utils import classify, suppress, dedup_list from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken from .tree import Tree, SlottedTree as ST @@ -237,7 +237,7 @@ class SimplifyRule_Visitor(Visitor): tree.data = 'expansions' tree.children = [self.visit(ST('expansion', [option if i==j else other for j, other in enumerate(tree.children)])) - for option in set(child.children)] + for option in dedup_list(child.children)] self._flatten(tree) break @@ -252,7 +252,7 @@ class SimplifyRule_Visitor(Visitor): def expansions(self, tree): self._flatten(tree) - tree.children = list(set(tree.children)) + tree.children = dedup_list(tree.children) class RuleTreeToText(Transformer): @@ -500,7 +500,8 @@ class Grammar: simplify_rule = SimplifyRule_Visitor() compiled_rules = [] - for name, tree, options in rules: + for i, rule_content in enumerate(rules): + name, tree, options = rule_content simplify_rule.visit(tree) expansions = rule_tree_to_text.transform(tree) @@ -517,7 +518,7 @@ class Grammar: exp_options = options assert all(isinstance(x, Symbol) for x in expansion), expansion - rule = Rule(NonTerminal(name), expansion, alias, exp_options) + rule = Rule(NonTerminal(name), expansion, i, alias, exp_options) compiled_rules.append(rule) return terminals, compiled_rules, self.ignore @@ -639,7 +640,7 @@ class GrammarLoader: terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] rules = [options_from_rule(name, x) for name, x in RULES.items()] - rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), None, o) for r, xs, o in rules for x in xs] + rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, xs, o in rules for i, x in enumerate(xs)] callback = ParseTreeBuilder(rules, ST).create_callback() lexer_conf = LexerConf(terminals, ['WS', 'COMMENT']) diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index 160cc4d..30d0f9d 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -54,16 +54,6 @@ class LALR_CustomLexer(WithLexer): self.lexer_conf = lexer_conf self.lexer = lexer_cls(lexer_conf) - -def get_ambiguity_options(options): - if not options or options.ambiguity == 'resolve': - return {} - elif options.ambiguity == 'resolve__antiscore_sum': - return {'forest_sum_visitor': earley_forest.ForestAntiscoreSumVisitor} - elif options.ambiguity == 'explicit': - return {'resolve_ambiguity': False} - raise ValueError(options) - def tokenize_text(text): line = 1 col_start_pos = 0 @@ -77,7 +67,8 @@ class Earley(WithLexer): def __init__(self, lexer_conf, parser_conf, options=None): self.init_traditional_lexer(lexer_conf) - self.parser = earley.Parser(parser_conf, self.match, **get_ambiguity_options(options)) + resolve_ambiguity = options.ambiguity == 'resolve' + self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity) def match(self, term, token): return term.name == token.type @@ -88,11 +79,11 @@ class XEarley: self.token_by_name = {t.name:t for t in lexer_conf.tokens} self._prepare_match(lexer_conf) - - kw.update(get_ambiguity_options(options)) + resolve_ambiguity = options.ambiguity == 'resolve' self.parser = xearley.Parser(parser_conf, self.match, ignore=lexer_conf.ignore, + resolve_ambiguity=resolve_ambiguity, **kw ) diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 9b3c218..0d787c9 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -17,10 +17,10 @@ from ..exceptions import ParseError, UnexpectedToken from .grammar_analysis import GrammarAnalyzer from ..grammar import NonTerminal from .earley_common import Item, TransitiveItem -from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, Forest +from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode class Parser: - def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor): + def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True): analysis = GrammarAnalyzer(parser_conf) self.parser_conf = parser_conf self.resolve_ambiguity = resolve_ambiguity @@ -35,11 +35,22 @@ class Parser: self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term } self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term } + self.forest_sum_visitor = None for rule in parser_conf.rules: self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias) self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] - self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks) + ## Detect if any rules have priorities set. If the user specified priority = "none" then + # the priorities will be stripped from all rules before they reach us, allowing us to + # skip the extra tree walk. We'll also skip this if the user just didn't specify priorities + # on any rules. + if self.forest_sum_visitor is None and rule.options and rule.options.priority is not None: + self.forest_sum_visitor = ForestSumVisitor() + + if resolve_ambiguity: + self.forest_tree_visitor = ForestToTreeVisitor(self.callbacks, self.forest_sum_visitor) + else: + self.forest_tree_visitor = ForestToAmbiguousTreeVisitor(self.callbacks, self.forest_sum_visitor) self.term_matcher = term_matcher @@ -278,7 +289,6 @@ class Parser: # Clear the node_cache and token_cache, which are only relevant for each # step in the Earley pass. node_cache.clear() - token_cache.clear() to_scan = scan(i, token, to_scan) i += 1 @@ -294,13 +304,8 @@ class Parser: elif len(solutions) > 1: raise ParseError('Earley should not generate multiple start symbol items!') - ## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller. - # This means the caller can work directly with the SPPF tree. - if not self.resolve_ambiguity: - return Forest(solutions[0], self.callbacks) - # ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities - # according to the rules. + # Perform our SPPF -> AST conversion using the right ForestVisitor. return self.forest_tree_visitor.go(solutions[0]) class ApplyCallbacks(Transformer_InPlace): diff --git a/lark/parsers/earley_forest.py b/lark/parsers/earley_forest.py index b10f595..86b234f 100644 --- a/lark/parsers/earley_forest.py +++ b/lark/parsers/earley_forest.py @@ -14,7 +14,9 @@ from ..lexer import Token from ..utils import Str from ..grammar import NonTerminal, Terminal, Symbol +from math import isinf from collections import deque +from operator import attrgetter from importlib import import_module class ForestNode(object): @@ -42,7 +44,10 @@ class SymbolNode(ForestNode): self._children = set() self.paths = set() self.paths_loaded = False - self.priority = None + + ### We use inf here as it can be safely negated without resorting to conditionals, + # unlike None or float('NaN'), and sorts appropriately. + self.priority = float('-inf') self.is_intermediate = isinstance(s, tuple) self._hash = hash((self.s, self.start, self.end)) @@ -68,9 +73,8 @@ class SymbolNode(ForestNode): @property def children(self): - if not self.paths_loaded: - self.load_paths() - return self._children + if not self.paths_loaded: self.load_paths() + return sorted(self._children, key=attrgetter('sort_key')) def __iter__(self): return iter(self._children) @@ -92,7 +96,7 @@ class SymbolNode(ForestNode): symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) else: symbol = self.s.name - return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority if self.priority is not None else 0) + return "({}, {}, {}, {})".format(symbol, self.start, self.end, self.priority) class PackedNode(ForestNode): """ @@ -106,30 +110,30 @@ class PackedNode(ForestNode): self.rule = rule self.left = left self.right = right - self.priority = None - self._hash = hash((self.s, self.start, self.left, self.right)) + self.priority = float('-inf') + self._hash = hash((self.left, self.right)) @property def is_empty(self): return self.left is None and self.right is None + @property + def sort_key(self): + """ + Used to sort PackedNode children of SymbolNodes. + A SymbolNode has multiple PackedNodes if it matched + ambiguously. Hence, we use the sort order to identify + the order in which ambiguous children should be considered. + """ + return self.is_empty, -self.priority, -self.rule.order + def __iter__(self): return iter([self.left, self.right]) - def __lt__(self, other): - if self.is_empty and not other.is_empty: return True - if self.priority < other.priority: return True - return False - - def __gt__(self, other): - if self.is_empty and not other.is_empty: return True - if self.priority > other.priority: return True - return False - def __eq__(self, other): if not isinstance(other, PackedNode): return False - return self is other or (self.s == other.s and self.start == other.start and self.left == other.left and self.right == other.right) + return self is other or (self.left == other.left and self.right == other.right) def __hash__(self): return self._hash @@ -143,7 +147,7 @@ class PackedNode(ForestNode): symbol = "{} ::= {}* {}".format(rule.origin.name, ' '.join(before), ' '.join(after)) else: symbol = self.s.name - return "({}, {}, {})".format(symbol, self.start, self.priority) + return "({}, {}, {}, {})".format(symbol, self.start, self.priority, self.rule.order) class ForestVisitor(object): """ @@ -230,11 +234,17 @@ class ForestSumVisitor(ForestVisitor): """ A visitor for prioritizing ambiguous parts of the Forest. - This visitor is the default when resolving ambiguity. It pushes the priorities - from the rules into the SPPF nodes; and then sorts the packed node children - of ambiguous symbol or intermediate node according to the priorities. - This relies on the custom sort function provided in PackedNode.__lt__; which - uses these properties (and other factors) to sort the ambiguous packed nodes. + This visitor is used when support for explicit priorities on + rules is requested (whether normal, or invert). It walks the + forest (or subsets thereof) and cascades properties upwards + from the leaves. + + It would be ideal to do this during parsing, however this would + require processing each Earley item multiple times. That's + a big performance drawback; so running a forest walk is the + lesser of two evils: there can be significantly more Earley + items created during parsing than there are SPPF nodes in the + final tree. """ def visit_packed_node_in(self, node): return iter([node.left, node.right]) @@ -243,49 +253,13 @@ class ForestSumVisitor(ForestVisitor): return iter(node.children) def visit_packed_node_out(self, node): - node.priority = 0 - if node.rule.options and node.rule.options.priority: node.priority += node.rule.options.priority - if node.right is not None and hasattr(node.right, 'priority'): node.priority += node.right.priority - if node.left is not None and hasattr(node.left, 'priority'): node.priority += node.left.priority + priority = node.rule.options.priority if not node.parent.is_intermediate and node.rule.options and node.rule.options.priority else 0 + priority += getattr(node.right, 'priority', 0) + priority += getattr(node.left, 'priority', 0) + node.priority = priority def visit_symbol_node_out(self, node): node.priority = max(child.priority for child in node.children) - node._children = sorted(node.children, reverse = True) - -class ForestAntiscoreSumVisitor(ForestSumVisitor): - """ - A visitor for prioritizing ambiguous parts of the Forest. - - This visitor is used when resolve_ambiguity == 'resolve__antiscore_sum'. - It pushes the priorities from the rules into the SPPF nodes, and implements - a 'least cost' mechanism for resolving ambiguity (reverse of the default - priority mechanism). It uses a custom __lt__ comparator key for sorting - the packed node children. - """ - def visit_symbol_node_out(self, node): - node.priority = min(child.priority for child in node.children) - node._children = sorted(node.children, key=AntiscoreSumComparator, reverse = True) - -class AntiscoreSumComparator(object): - """ - An antiscore-sum comparator for PackedNode objects. - - This allows 'sorting' an iterable of PackedNode objects so that they - are arranged lowest priority first. - """ - __slots__ = ['obj'] - def __init__(self, obj, *args): - self.obj = obj - - def __lt__(self, other): - if self.obj.is_empty and not other.obj.is_empty: return True - if self.obj.priority > other.obj.priority: return True - return False - - def __gt__(self, other): - if self.obj.is_empty and not other.obj.is_empty: return True - if self.obj.priority < other.obj.priority: return True - return False class ForestToTreeVisitor(ForestVisitor): """ @@ -299,9 +273,9 @@ class ForestToTreeVisitor(ForestVisitor): implementation should be another ForestVisitor which sorts the children according to some priority mechanism. """ - __slots__ = ['forest_sum_visitor', 'output_stack', 'callbacks'] - def __init__(self, forest_sum_visitor = ForestSumVisitor, callbacks = None): - self.forest_sum_visitor = forest_sum_visitor() + __slots__ = ['forest_sum_visitor', 'callbacks', 'output_stack'] + def __init__(self, callbacks = None, forest_sum_visitor = None): + self.forest_sum_visitor = forest_sum_visitor self.callbacks = callbacks def go(self, root): @@ -312,7 +286,7 @@ class ForestToTreeVisitor(ForestVisitor): self.output_stack[-1].append(node) def visit_symbol_node_in(self, node): - if node.is_ambiguous and node.priority is None: + if self.forest_sum_visitor and node.is_ambiguous and isinf(node.priority): self.forest_sum_visitor.go(node) return next(iter(node.children)) @@ -329,7 +303,7 @@ class ForestToTreeVisitor(ForestVisitor): else: self.result = result -class ForestToAmbiguousTreeVisitor(ForestVisitor): +class ForestToAmbiguousTreeVisitor(ForestToTreeVisitor): """ A Forest visitor which converts an SPPF forest to an ambiguous AST. @@ -349,18 +323,15 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor): This is mainly used by the test framework, to make it simpler to write tests ensuring the SPPF contains the right results. """ - __slots__ = ['output_stack', 'callbacks'] - def __init__(self, callbacks): - self.callbacks = callbacks - - def go(self, root): - self.output_stack = deque([]) - return super(ForestToAmbiguousTreeVisitor, self).go(root) + def __init__(self, callbacks, forest_sum_visitor = ForestSumVisitor): + super(ForestToAmbiguousTreeVisitor, self).__init__(callbacks, forest_sum_visitor) def visit_token_node(self, node): self.output_stack[-1].children.append(node) def visit_symbol_node_in(self, node): + if self.forest_sum_visitor and node.is_ambiguous and isinf(node.priority): + self.forest_sum_visitor.go(node) if not node.is_intermediate and node.is_ambiguous: self.output_stack.append(Tree('_ambig', [])) return iter(node.children) @@ -374,9 +345,6 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor): self.result = result def visit_packed_node_in(self, node): - #### NOTE: - ## When an intermediate node (node.parent.s == tuple) has ambiguous children this - ## forest visitor will break. if not node.parent.is_intermediate: self.output_stack.append(Tree('drv', [])) return iter([node.left, node.right]) @@ -462,20 +430,3 @@ class ForestToPyDotVisitor(ForestVisitor): child_graph_node_id = str(id(child)) child_graph_node = self.graph.get_node(child_graph_node_id)[0] self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node)) - -class Forest(Tree): - def __init__(self, root, callbacks): - self.root = root - self.callbacks = callbacks - self.data = '_ambig' - self._children = None - - @property - def children(self): - if self._children is None: - t = ForestToAmbiguousTreeVisitor(self.callbacks).go(self.root) - self._children = t.children - return self._children - - def to_pydot(self, filename): - ForestToPyDotVisitor().go(self.root, filename) \ No newline at end of file diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index 241e491..d1458dc 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -24,11 +24,11 @@ from .grammar_analysis import GrammarAnalyzer from ..grammar import NonTerminal, Terminal from .earley import ApplyCallbacks from .earley_common import Item, TransitiveItem -from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, Forest +from .earley_forest import ForestToTreeVisitor, ForestToAmbiguousTreeVisitor, ForestSumVisitor, ForestToPyDotVisitor, SymbolNode class Parser: - def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor, ignore = (), complete_lex = False): + def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False): analysis = GrammarAnalyzer(parser_conf) self.parser_conf = parser_conf self.resolve_ambiguity = resolve_ambiguity @@ -41,15 +41,25 @@ class Parser: self.predictions = {} ## These could be moved to the grammar analyzer. Pre-computing these is *much* faster than - # the slow 'isupper' in is_terminal. + # the slow 'isupper' in is_terminal; or even called sym.is_term directly. self.TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if sym.is_term } self.NON_TERMINALS = { sym for r in parser_conf.rules for sym in r.expansion if not sym.is_term } + self.forest_sum_visitor = None for rule in parser_conf.rules: self.callbacks[rule] = getattr(parser_conf.callback, rule.alias or rule.origin, None) self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] - self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks) + ## Detect if any rules have priorities set. If the user specified priority = "none" then + # the priorities will be stripped from all rules before they reach us, allowing us to + # skip the extra tree walk. + if self.forest_sum_visitor is None and rule.options and rule.options.priority is not None: + self.forest_sum_visitor = ForestSumVisitor() + + if resolve_ambiguity: + self.forest_tree_visitor = ForestToTreeVisitor(self.callbacks, self.forest_sum_visitor) + else: + self.forest_tree_visitor = ForestToAmbiguousTreeVisitor(self.callbacks, self.forest_sum_visitor) self.term_matcher = term_matcher def parse(self, stream, start_symbol=None): @@ -362,11 +372,6 @@ class Parser: elif len(solutions) > 1: raise Exception('Earley should not generate more than one start symbol - bug') - ## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller. - # This means the caller can work directly with the SPPF tree. - if not self.resolve_ambiguity: - return Forest(solutions[0], self.callbacks) - - # ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities - # according to the rules. + # Perform our SPPF -> AST conversion using the right ForestVisitor. return self.forest_tree_visitor.go(solutions[0]) + diff --git a/lark/reconstruct.py b/lark/reconstruct.py index 2e51e93..1ab679e 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -96,13 +96,13 @@ class Reconstructor: sym = NonTerminal(r.alias) if r.alias else r.origin - yield Rule(sym, recons_exp, MakeMatchTree(sym.name, r.expansion)) + yield Rule(sym, recons_exp, alias=MakeMatchTree(sym.name, r.expansion)) for origin, rule_aliases in aliases.items(): for alias in rule_aliases: - yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)])) + yield Rule(origin, [Terminal(alias)], alias=MakeMatchTree(origin.name, [NonTerminal(alias)])) - yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin])) + yield Rule(origin, [Terminal(origin.name)], alias=MakeMatchTree(origin.name, [origin])) diff --git a/lark/tools/standalone.py b/lark/tools/standalone.py index 1f664b6..1cc08a6 100644 --- a/lark/tools/standalone.py +++ b/lark/tools/standalone.py @@ -208,7 +208,7 @@ class TreeBuilderAtoms: print('RULES = {') for i, r in enumerate(self.rules): rule_ids[r] = i - print(' %d: Rule(%r, [%s], %r, %r),' % (i, r.origin, ', '.join(s.fullrepr for s in r.expansion), self.ptb.user_aliases[r], r.options )) + print(' %d: Rule(%r, [%s], alias=%r, options=%r),' % (i, r.origin, ', '.join(s.fullrepr for s in r.expansion), self.ptb.user_aliases[r], r.options )) print('}') print('parse_tree_builder = ParseTreeBuilder(RULES.values(), Tree)') diff --git a/lark/utils.py b/lark/utils.py index 8de0b3a..1fc3ebb 100644 --- a/lark/utils.py +++ b/lark/utils.py @@ -75,6 +75,12 @@ def smart_decorator(f, create_decorator): else: return create_decorator(f.__func__.__call__, True) +def dedup_list(l): + """Given a list (l) will removing duplicates from the list, + preserving the original order of the list. Assumes that + the list entrie are hashable.""" + dedup = set() + return [ x for x in l if not (x in dedup or dedup.add(x))] ###} diff --git a/tests/test_parser.py b/tests/test_parser.py index 10af1a8..19b09f2 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -201,7 +201,7 @@ def _make_full_earley_test(LEXER): l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") - self.assertEqual(res.children, ['aa', 'a']) + self.assertEqual(res.children, ['a', 'aa']) def test_earley4(self): grammar = """ @@ -211,6 +211,7 @@ def _make_full_earley_test(LEXER): l = Lark(grammar, parser='earley', lexer=LEXER) res = l.parse("aaa") +# print(res.pretty()) self.assertEqual(res.children, ['aaa']) def test_earley_repeating_empty(self): @@ -1069,7 +1070,7 @@ def _make_parser_test(LEXER, PARSER): bb_.1: "bb" """ - l = Lark(grammar, ambiguity='resolve__antiscore_sum') + l = Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') @@ -1082,8 +1083,9 @@ def _make_parser_test(LEXER, PARSER): bb_: "bb" """ - l = Lark(grammar, ambiguity='resolve__antiscore_sum') + l = Lark(grammar, priority="invert") res = l.parse('abba') +# print(res.pretty()) self.assertEqual(''.join(child.data for child in res.children), 'indirection') grammar = """ @@ -1095,7 +1097,7 @@ def _make_parser_test(LEXER, PARSER): bb_.3: "bb" """ - l = Lark(grammar, ambiguity='resolve__antiscore_sum') + l = Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_') @@ -1108,7 +1110,7 @@ def _make_parser_test(LEXER, PARSER): bb_.3: "bb" """ - l = Lark(grammar, ambiguity='resolve__antiscore_sum') + l = Lark(grammar, priority="invert") res = l.parse('abba') self.assertEqual(''.join(child.data for child in res.children), 'indirection')