From 852607b978584ecdec68ac115dd8554cdb0a2305 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Mon, 11 Dec 2017 00:29:27 +0200 Subject: [PATCH] BUGFIX: Tokens of different type were equal, causing disambiguation errors (Issue #21) --- lark/lexer.py | 8 ++++++++ lark/parsers/resolve_ambig.py | 15 ++------------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/lark/lexer.py b/lark/lexer.py index c24a5b3..4fcdc95 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -40,6 +40,14 @@ class Token(Str): def __deepcopy__(self, memo): return Token(self.type, self.value, self.pos_in_stream, self.line, self.column) + def __eq__(self, other): + if isinstance(other, Token) and self.type != other.type: + return False + + return Str.__eq__(self, other) + + __hash__ = Str.__hash__ + class Regex: def __init__(self, pattern, flags=()): self.pattern = pattern diff --git a/lark/parsers/resolve_ambig.py b/lark/parsers/resolve_ambig.py index c965433..f60a3f0 100644 --- a/lark/parsers/resolve_ambig.py +++ b/lark/parsers/resolve_ambig.py @@ -84,23 +84,12 @@ def _antiscore_sum_drv(tree): if not isinstance(tree, Tree): return 0 - # XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, - # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be - # computationally inefficient. So we handle it here. - if tree.data == '_ambig': - _antiscore_sum_resolve_ambig(tree) + assert tree.data != '_ambig' - try: - priority = tree.rule.options.priority - except AttributeError: - # Probably trees that don't take part in this parse (better way to distinguish?) - priority = None - - return (priority or 0) + sum(map(_antiscore_sum_drv, tree.children), 0) + return _sum_priority(tree) def _antiscore_sum_resolve_ambig(tree): assert tree.data == '_ambig' - best = min(tree.children, key=_antiscore_sum_drv) assert best.data == 'drv' tree.set('drv', best.children)