@@ -0,0 +1,39 @@ | |||||
# | |||||
# This example shows how to use get explicit ambiguity from Lark's Earley parser. | |||||
# | |||||
from lark import Lark | |||||
g = """ | |||||
sentence: noun verb noun -> simple | |||||
| noun verb "like" noun -> comparative | |||||
noun: ADJ? NOUN | |||||
verb: VERB | |||||
NOUN: "flies" | "bananas" | "fruit" | |||||
VERB: "like" | "flies" | |||||
ADJ: "fruit" | |||||
%import common.WS | |||||
%ignore WS | |||||
""" | |||||
lark = Lark(g, start='sentence', ambiguity='explicit') | |||||
print(lark.parse('fruit flies like bananas').pretty()) | |||||
# Outputs: | |||||
# | |||||
# _ambig | |||||
# comparative | |||||
# noun fruit | |||||
# verb flies | |||||
# noun bananas | |||||
# simple | |||||
# noun | |||||
# fruit | |||||
# flies | |||||
# verb like | |||||
# noun bananas | |||||
@@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||||
from .lark import Lark | from .lark import Lark | ||||
from .utils import inline_args | from .utils import inline_args | ||||
__version__ = "0.2.6" | |||||
__version__ = "0.2.7" |
@@ -27,6 +27,11 @@ class LarkOptions(object): | |||||
"contextual": Stronger lexer (only works with parser="lalr") | "contextual": Stronger lexer (only works with parser="lalr") | ||||
"auto" (default): Choose for me based on grammar and parser | "auto" (default): Choose for me based on grammar and parser | ||||
ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" | |||||
"resolve": The parser will automatically choose the simplest derivation | |||||
(it chooses consistently: greedy for tokens, non-greedy for rules) | |||||
"explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). | |||||
transformer - Applies the transformer to every parse tree | transformer - Applies the transformer to every parse tree | ||||
debug - Affects verbosity (default: False) | debug - Affects verbosity (default: False) | ||||
keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False) | keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False) | ||||
@@ -49,6 +54,7 @@ class LarkOptions(object): | |||||
self.transformer = o.pop('transformer', None) | self.transformer = o.pop('transformer', None) | ||||
self.start = o.pop('start', 'start') | self.start = o.pop('start', 'start') | ||||
self.profile = o.pop('profile', False) | self.profile = o.pop('profile', False) | ||||
self.ambiguity = o.pop('ambiguity', 'auto') | |||||
assert self.parser in ('earley', 'lalr', None) | assert self.parser in ('earley', 'lalr', None) | ||||
@@ -119,13 +125,20 @@ class Lark: | |||||
assert not self.options.profile, "Feature temporarily disabled" | assert not self.options.profile, "Feature temporarily disabled" | ||||
self.profiler = Profiler() if self.options.profile else None | self.profiler = Profiler() if self.options.profile else None | ||||
lexer = self.options.lexer | |||||
if lexer == 'auto': | |||||
if self.options.lexer == 'auto': | |||||
if self.options.parser == 'lalr': | if self.options.parser == 'lalr': | ||||
lexer = 'standard' | |||||
self.options.lexer = 'standard' | |||||
elif self.options.parser == 'earley': | elif self.options.parser == 'earley': | ||||
lexer = None | |||||
self.options.lexer = lexer | |||||
self.options.lexer = None | |||||
lexer = self.options.lexer | |||||
assert lexer in ('standard', 'contextual', None) | |||||
if self.options.ambiguity == 'auto': | |||||
if self.options.parser == 'earley': | |||||
self.options.ambiguity = 'resolve' | |||||
else: | |||||
assert self.options.parser == 'earley' | |||||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | |||||
self.grammar = load_grammar(grammar, source) | self.grammar = load_grammar(grammar, source) | ||||
tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer), start=self.options.start) | tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer), start=self.options.start) | ||||
@@ -155,7 +168,7 @@ class Lark: | |||||
setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f))) | setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f))) | ||||
parser_conf = ParserConf(rules, callback, self.options.start) | parser_conf = ParserConf(rules, callback, self.options.start) | ||||
return self.parser_class(self.lexer_conf, parser_conf) | |||||
return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||||
def lex(self, text): | def lex(self, text): | ||||
@@ -20,7 +20,7 @@ class WithLexer: | |||||
return stream | return stream | ||||
class LALR(WithLexer): | class LALR(WithLexer): | ||||
def __init__(self, lexer_conf, parser_conf): | |||||
def __init__(self, lexer_conf, parser_conf, options=None): | |||||
WithLexer.__init__(self, lexer_conf) | WithLexer.__init__(self, lexer_conf) | ||||
self.parser_conf = parser_conf | self.parser_conf = parser_conf | ||||
@@ -31,7 +31,7 @@ class LALR(WithLexer): | |||||
return self.parser.parse(tokens) | return self.parser.parse(tokens) | ||||
class LALR_ContextualLexer: | class LALR_ContextualLexer: | ||||
def __init__(self, lexer_conf, parser_conf): | |||||
def __init__(self, lexer_conf, parser_conf, options=None): | |||||
self.lexer_conf = lexer_conf | self.lexer_conf = lexer_conf | ||||
self.parser_conf = parser_conf | self.parser_conf = parser_conf | ||||
@@ -126,12 +126,16 @@ class OldEarley_NoLex: | |||||
return res[0] | return res[0] | ||||
class Earley_NoLex: | class Earley_NoLex: | ||||
def __init__(self, lexer_conf, parser_conf): | |||||
def __init__(self, lexer_conf, parser_conf, options=None): | |||||
self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | ||||
self.parser = earley.Parser(rules, parser_conf.start, parser_conf.callback) | |||||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||||
self.parser = earley.Parser(rules, | |||||
parser_conf.start, | |||||
parser_conf.callback, | |||||
resolve_ambiguity=resolve_ambiguity) | |||||
def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
for sym in expansion: | for sym in expansion: | ||||
@@ -149,12 +153,16 @@ class Earley_NoLex: | |||||
return self.parser.parse(new_text) | return self.parser.parse(new_text) | ||||
class Earley(WithLexer): | class Earley(WithLexer): | ||||
def __init__(self, lexer_conf, parser_conf): | |||||
def __init__(self, lexer_conf, parser_conf, options=None): | |||||
WithLexer.__init__(self, lexer_conf) | WithLexer.__init__(self, lexer_conf) | ||||
rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules] | rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules] | ||||
self.parser = earley.Parser(rules, parser_conf.start, parser_conf.callback) | |||||
resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||||
self.parser = earley.Parser(rules, | |||||
parser_conf.start, | |||||
parser_conf.callback, | |||||
resolve_ambiguity=resolve_ambiguity) | |||||
def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | ||||
@@ -101,10 +101,10 @@ class Column: | |||||
# XXX Potential bug: What happens if there's ambiguity in an empty rule? | # XXX Potential bug: What happens if there's ambiguity in an empty rule? | ||||
if item.rule.expansion and item in self.completed: | if item.rule.expansion and item in self.completed: | ||||
old_tree = self.completed[item].tree | old_tree = self.completed[item].tree | ||||
if old_tree.data != 'ambig': | |||||
if old_tree.data != '_ambig': | |||||
new_tree = old_tree.copy() | new_tree = old_tree.copy() | ||||
new_tree.rule = old_tree.rule | new_tree.rule = old_tree.rule | ||||
old_tree.set('ambig', [new_tree]) | |||||
old_tree.set('_ambig', [new_tree]) | |||||
if item.tree.children[0] is old_tree: # XXX a little hacky! | if item.tree.children[0] is old_tree: # XXX a little hacky! | ||||
raise ParseError("Infinite recursion in grammar!") | raise ParseError("Infinite recursion in grammar!") | ||||
old_tree.children.append(item.tree) | old_tree.children.append(item.tree) | ||||
@@ -125,9 +125,10 @@ class Column: | |||||
return bool(self.item_count) | return bool(self.item_count) | ||||
class Parser: | class Parser: | ||||
def __init__(self, rules, start, callback): | |||||
def __init__(self, rules, start, callback, resolve_ambiguity=True): | |||||
self.analysis = GrammarAnalyzer(rules, start) | self.analysis = GrammarAnalyzer(rules, start) | ||||
self.start = start | self.start = start | ||||
self.resolve_ambiguity = resolve_ambiguity | |||||
self.postprocess = {} | self.postprocess = {} | ||||
self.predictions = {} | self.predictions = {} | ||||
@@ -197,9 +198,11 @@ class Parser: | |||||
elif len(solutions) == 1: | elif len(solutions) == 1: | ||||
tree = solutions[0] | tree = solutions[0] | ||||
else: | else: | ||||
tree = Tree('ambig', solutions) | |||||
tree = Tree('_ambig', solutions) | |||||
if self.resolve_ambiguity: | |||||
ResolveAmbig().visit(tree) | |||||
ResolveAmbig().visit(tree) | |||||
return ApplyCallbacks(self.postprocess).transform(tree) | return ApplyCallbacks(self.postprocess).transform(tree) | ||||
@@ -220,9 +223,8 @@ def _compare_rules(rule1, rule2): | |||||
assert rule1.origin == rule2.origin | assert rule1.origin == rule2.origin | ||||
c = compare( len(rule1.expansion), len(rule2.expansion)) | c = compare( len(rule1.expansion), len(rule2.expansion)) | ||||
if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here | if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here | ||||
return c | |||||
else: | |||||
return -c | |||||
c = -c | |||||
return c | |||||
def _compare_drv(tree1, tree2): | def _compare_drv(tree1, tree2): | ||||
if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | ||||
@@ -242,8 +244,8 @@ def _compare_drv(tree1, tree2): | |||||
class ResolveAmbig(Visitor_NoRecurse): | class ResolveAmbig(Visitor_NoRecurse): | ||||
def ambig(self, tree): | |||||
best = max(tree.children, key=cmp_to_key(_compare_drv)) | |||||
def _ambig(self, tree): | |||||
best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||||
assert best.data == 'drv' | assert best.data == 'drv' | ||||
tree.set('drv', best.children) | tree.set('drv', best.children) | ||||
tree.rule = best.rule # needed for applying callbacks | tree.rule = best.rule # needed for applying callbacks | ||||
@@ -120,6 +120,23 @@ class TestEarley(unittest.TestCase): | |||||
empty_tree = Tree('empty', [Tree('empty2', [])]) | empty_tree = Tree('empty', [Tree('empty2', [])]) | ||||
self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b']) | self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b']) | ||||
def test_earley_explicit_ambiguity(self): | |||||
# This was a sneaky bug! | |||||
grammar = """ | |||||
start: a b | ab | |||||
a: "a" | |||||
b: "b" | |||||
ab: "ab" | |||||
""" | |||||
parser = Lark(grammar, parser='earley', lexer=None, ambiguity='explicit') | |||||
res = parser.parse('ab') | |||||
self.assertEqual( res.data, '_ambig') | |||||
self.assertEqual( len(res.children), 2) | |||||
def _make_parser_test(LEXER, PARSER): | def _make_parser_test(LEXER, PARSER): | ||||
def _Lark(grammar, **kwargs): | def _Lark(grammar, **kwargs): | ||||
return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | ||||