@@ -139,7 +139,7 @@ class Lark: | |||||
if self.options.parser == 'earley': | if self.options.parser == 'earley': | ||||
self.options.ambiguity = 'resolve' | self.options.ambiguity = 'resolve' | ||||
else: | else: | ||||
assert self.options.parser == 'earley' | |||||
assert self.options.parser == 'earley', "Only Earley supports disambiguation right now" | |||||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | ||||
# Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
@@ -4,7 +4,7 @@ import sre_parse | |||||
from .lexer import Lexer, ContextualLexer, Token | from .lexer import Lexer, ContextualLexer, Token | ||||
from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Terminal_Token | from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Terminal_Token | ||||
from .parsers import lalr_parser, earley, xearley | |||||
from .parsers import lalr_parser, earley, xearley, resolve_ambig | |||||
class WithLexer: | class WithLexer: | ||||
def __init__(self, lexer_conf): | def __init__(self, lexer_conf): | ||||
@@ -48,6 +48,12 @@ class LALR_ContextualLexer: | |||||
tokens = self.lexer_conf.postlex.process(tokens) | tokens = self.lexer_conf.postlex.process(tokens) | ||||
return self.parser.parse(tokens, self.lexer.set_parser_state) | return self.parser.parse(tokens, self.lexer.set_parser_state) | ||||
def get_ambiguity_resolver(options): | |||||
if not options or options.ambiguity == 'resolve': | |||||
return resolve_ambig.resolve_ambig | |||||
elif options.ambiguity == 'explicit': | |||||
return None | |||||
raise ValueError(options) | |||||
def tokenize_text(text): | def tokenize_text(text): | ||||
new_text = [] | new_text = [] | ||||
@@ -66,11 +72,10 @@ class Earley_NoLex: | |||||
rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] | rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] | ||||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||||
self.parser = earley.Parser(rules, | |||||
self.parser = earley.Parser(rules, | |||||
parser_conf.start, | parser_conf.start, | ||||
parser_conf.callback, | parser_conf.callback, | ||||
resolve_ambiguity=resolve_ambiguity) | |||||
resolve_ambiguity=get_ambiguity_resolver(options)) | |||||
def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
for sym in expansion: | for sym in expansion: | ||||
@@ -93,11 +98,10 @@ class Earley(WithLexer): | |||||
rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | ||||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||||
self.parser = earley.Parser(rules, | self.parser = earley.Parser(rules, | ||||
parser_conf.start, | parser_conf.start, | ||||
parser_conf.callback, | parser_conf.callback, | ||||
resolve_ambiguity=resolve_ambiguity) | |||||
resolve_ambiguity=get_ambiguity_resolver(options)) | |||||
def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | ||||
@@ -113,13 +117,12 @@ class XEarley: | |||||
rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] | rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] | ||||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||||
ignore = [Terminal_Regexp(x, self.token_by_name[x].pattern.to_regexp()) for x in lexer_conf.ignore] | ignore = [Terminal_Regexp(x, self.token_by_name[x].pattern.to_regexp()) for x in lexer_conf.ignore] | ||||
self.parser = xearley.Parser(rules, | self.parser = xearley.Parser(rules, | ||||
parser_conf.start, | parser_conf.start, | ||||
parser_conf.callback, | parser_conf.callback, | ||||
resolve_ambiguity=resolve_ambiguity, | |||||
resolve_ambiguity=get_ambiguity_resolver(options), | |||||
ignore=ignore, | ignore=ignore, | ||||
) | ) | ||||
@@ -13,9 +13,6 @@ | |||||
# Author: Erez Shinan (2017) | # Author: Erez Shinan (2017) | ||||
# Email : erezshin@gmail.com | # Email : erezshin@gmail.com | ||||
from functools import cmp_to_key | |||||
from ..utils import compare | |||||
from ..common import ParseError, UnexpectedToken, Terminal | from ..common import ParseError, UnexpectedToken, Terminal | ||||
from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | ||||
from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
@@ -136,7 +133,7 @@ class Column: | |||||
return bool(self.item_count) | return bool(self.item_count) | ||||
class Parser: | class Parser: | ||||
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=True): | |||||
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None): | |||||
self.analysis = GrammarAnalyzer(rules, start_symbol) | self.analysis = GrammarAnalyzer(rules, start_symbol) | ||||
self.start_symbol = start_symbol | self.start_symbol = start_symbol | ||||
self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
@@ -213,10 +210,9 @@ class Parser: | |||||
tree = Tree('_ambig', solutions) | tree = Tree('_ambig', solutions) | ||||
if self.resolve_ambiguity: | if self.resolve_ambiguity: | ||||
ResolveAmbig().visit(tree) | |||||
tree = self.resolve_ambiguity(tree) | |||||
return ApplyCallbacks(self.postprocess).transform(tree) | return ApplyCallbacks(self.postprocess).transform(tree) | ||||
class ApplyCallbacks(Transformer_NoRecurse): | class ApplyCallbacks(Transformer_NoRecurse): | ||||
@@ -231,66 +227,6 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||||
else: | else: | ||||
return Tree(rule.origin, children) | return Tree(rule.origin, children) | ||||
def _compare_rules(rule1, rule2): | |||||
if rule1.origin != rule2.origin: | |||||
if rule1.options and rule2.options: | |||||
if rule1.options.priority is not None and rule2.options.priority is not None: | |||||
assert rule1.options.priority != rule2.options.priority, "Priority is the same between both rules: %s == %s" % (rule1, rule2) | |||||
return -compare(rule1.options.priority, rule2.options.priority) | |||||
return 0 | |||||
c = compare( len(rule1.expansion), len(rule2.expansion)) | |||||
if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here | |||||
c = -c | |||||
return c | |||||
def _compare_drv(tree1, tree2): | |||||
if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | |||||
return -compare(tree1, tree2) | |||||
try: | |||||
rule1, rule2 = tree1.rule, tree2.rule | |||||
except AttributeError: | |||||
# Probably trees that don't take part in this parse (better way to distinguish?) | |||||
return -compare(tree1, tree2) | |||||
# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | |||||
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||||
# computationally inefficient. So we handle it here. | |||||
if tree1.data == '_ambig': | |||||
_resolve_ambig(tree1) | |||||
if tree2.data == '_ambig': | |||||
_resolve_ambig(tree2) | |||||
c = _compare_rules(tree1.rule, tree2.rule) | |||||
if c: | |||||
return c | |||||
# rules are "equal", so compare trees | |||||
for t1, t2 in zip(tree1.children, tree2.children): | |||||
c = _compare_drv(t1, t2) | |||||
if c: | |||||
return c | |||||
return compare(len(tree1.children), len(tree2.children)) | |||||
def _resolve_ambig(tree): | |||||
assert tree.data == '_ambig' | |||||
best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||||
assert best.data == 'drv' | |||||
tree.set('drv', best.children) | |||||
tree.rule = best.rule # needed for applying callbacks | |||||
assert tree.data != '_ambig' | |||||
class ResolveAmbig(Visitor_NoRecurse): | |||||
def _ambig(self, tree): | |||||
_resolve_ambig(tree) | |||||
# RULES = [ | # RULES = [ | ||||
# ('a', ['d']), | # ('a', ['d']), | ||||
# ('d', ['b']), | # ('d', ['b']), | ||||
@@ -0,0 +1,68 @@ | |||||
from ..utils import compare | |||||
from functools import cmp_to_key | |||||
from ..tree import Tree, Visitor_NoRecurse | |||||
def _compare_rules(rule1, rule2): | |||||
if rule1.origin != rule2.origin: | |||||
if rule1.options and rule2.options: | |||||
if rule1.options.priority is not None and rule2.options.priority is not None: | |||||
assert rule1.options.priority != rule2.options.priority, "Priority is the same between both rules: %s == %s" % (rule1, rule2) | |||||
return -compare(rule1.options.priority, rule2.options.priority) | |||||
return 0 | |||||
c = compare( len(rule1.expansion), len(rule2.expansion)) | |||||
if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here | |||||
c = -c | |||||
return c | |||||
def _compare_drv(tree1, tree2): | |||||
if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | |||||
return -compare(tree1, tree2) | |||||
try: | |||||
rule1, rule2 = tree1.rule, tree2.rule | |||||
except AttributeError: | |||||
# Probably trees that don't take part in this parse (better way to distinguish?) | |||||
return -compare(tree1, tree2) | |||||
# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | |||||
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | |||||
# computationally inefficient. So we handle it here. | |||||
if tree1.data == '_ambig': | |||||
_resolve_ambig(tree1) | |||||
if tree2.data == '_ambig': | |||||
_resolve_ambig(tree2) | |||||
c = _compare_rules(tree1.rule, tree2.rule) | |||||
if c: | |||||
return c | |||||
# rules are "equal", so compare trees | |||||
for t1, t2 in zip(tree1.children, tree2.children): | |||||
c = _compare_drv(t1, t2) | |||||
if c: | |||||
return c | |||||
return compare(len(tree1.children), len(tree2.children)) | |||||
def _resolve_ambig(tree): | |||||
assert tree.data == '_ambig' | |||||
best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||||
assert best.data == 'drv' | |||||
tree.set('drv', best.children) | |||||
tree.rule = best.rule # needed for applying callbacks | |||||
assert tree.data != '_ambig' | |||||
class ResolveAmbig(Visitor_NoRecurse): | |||||
def _ambig(self, tree): | |||||
_resolve_ambig(tree) | |||||
def resolve_ambig(tree): | |||||
ResolveAmbig().visit(tree) | |||||
return tree |
@@ -25,10 +25,10 @@ from ..lexer import Token | |||||
from ..tree import Tree | from ..tree import Tree | ||||
from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
from .earley import ResolveAmbig, ApplyCallbacks, Item, NewsList, Derivation, END_TOKEN, Column | |||||
from .earley import ApplyCallbacks, Item, Column | |||||
class Parser: | class Parser: | ||||
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=True, ignore=()): | |||||
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, ignore=()): | |||||
self.analysis = GrammarAnalyzer(rules, start_symbol) | self.analysis = GrammarAnalyzer(rules, start_symbol) | ||||
self.start_symbol = start_symbol | self.start_symbol = start_symbol | ||||
self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
@@ -132,7 +132,7 @@ class Parser: | |||||
tree = Tree('_ambig', solutions) | tree = Tree('_ambig', solutions) | ||||
if self.resolve_ambiguity: | if self.resolve_ambiguity: | ||||
ResolveAmbig().visit(tree) | |||||
tree = self.resolve_ambiguity(tree) | |||||
return ApplyCallbacks(self.postprocess).transform(tree) | return ApplyCallbacks(self.postprocess).transform(tree) | ||||