| @@ -0,0 +1,39 @@ | |||
| # | |||
| # This example shows how to use get explicit ambiguity from Lark's Earley parser. | |||
| # | |||
| from lark import Lark | |||
| g = """ | |||
| sentence: noun verb noun -> simple | |||
| | noun verb "like" noun -> comparative | |||
| noun: ADJ? NOUN | |||
| verb: VERB | |||
| NOUN: "flies" | "bananas" | "fruit" | |||
| VERB: "like" | "flies" | |||
| ADJ: "fruit" | |||
| %import common.WS | |||
| %ignore WS | |||
| """ | |||
| lark = Lark(g, start='sentence', ambiguity='explicit') | |||
| print(lark.parse('fruit flies like bananas').pretty()) | |||
| # Outputs: | |||
| # | |||
| # _ambig | |||
| # comparative | |||
| # noun fruit | |||
| # verb flies | |||
| # noun bananas | |||
| # simple | |||
| # noun | |||
| # fruit | |||
| # flies | |||
| # verb like | |||
| # noun bananas | |||
| @@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||
| from .lark import Lark | |||
| from .utils import inline_args | |||
| __version__ = "0.2.6" | |||
| __version__ = "0.2.7" | |||
| @@ -27,6 +27,11 @@ class LarkOptions(object): | |||
| "contextual": Stronger lexer (only works with parser="lalr") | |||
| "auto" (default): Choose for me based on grammar and parser | |||
| ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" | |||
| "resolve": The parser will automatically choose the simplest derivation | |||
| (it chooses consistently: greedy for tokens, non-greedy for rules) | |||
| "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). | |||
| transformer - Applies the transformer to every parse tree | |||
| debug - Affects verbosity (default: False) | |||
| keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False) | |||
| @@ -49,6 +54,7 @@ class LarkOptions(object): | |||
| self.transformer = o.pop('transformer', None) | |||
| self.start = o.pop('start', 'start') | |||
| self.profile = o.pop('profile', False) | |||
| self.ambiguity = o.pop('ambiguity', 'auto') | |||
| assert self.parser in ('earley', 'lalr', None) | |||
| @@ -119,13 +125,20 @@ class Lark: | |||
| assert not self.options.profile, "Feature temporarily disabled" | |||
| self.profiler = Profiler() if self.options.profile else None | |||
| lexer = self.options.lexer | |||
| if lexer == 'auto': | |||
| if self.options.lexer == 'auto': | |||
| if self.options.parser == 'lalr': | |||
| lexer = 'standard' | |||
| self.options.lexer = 'standard' | |||
| elif self.options.parser == 'earley': | |||
| lexer = None | |||
| self.options.lexer = lexer | |||
| self.options.lexer = None | |||
| lexer = self.options.lexer | |||
| assert lexer in ('standard', 'contextual', None) | |||
| if self.options.ambiguity == 'auto': | |||
| if self.options.parser == 'earley': | |||
| self.options.ambiguity = 'resolve' | |||
| else: | |||
| assert self.options.parser == 'earley' | |||
| assert self.options.ambiguity in ('resolve', 'explicit', 'auto') | |||
| self.grammar = load_grammar(grammar, source) | |||
| tokens, self.rules, self.grammar_extra = self.grammar.compile(lexer=bool(lexer), start=self.options.start) | |||
| @@ -155,7 +168,7 @@ class Lark: | |||
| setattr(callback, f, self.profiler.make_wrapper('transformer', getattr(callback, f))) | |||
| parser_conf = ParserConf(rules, callback, self.options.start) | |||
| return self.parser_class(self.lexer_conf, parser_conf) | |||
| return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||
| def lex(self, text): | |||
| @@ -20,7 +20,7 @@ class WithLexer: | |||
| return stream | |||
| class LALR(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| WithLexer.__init__(self, lexer_conf) | |||
| self.parser_conf = parser_conf | |||
| @@ -31,7 +31,7 @@ class LALR(WithLexer): | |||
| return self.parser.parse(tokens) | |||
| class LALR_ContextualLexer: | |||
| def __init__(self, lexer_conf, parser_conf): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| self.lexer_conf = lexer_conf | |||
| self.parser_conf = parser_conf | |||
| @@ -126,12 +126,16 @@ class OldEarley_NoLex: | |||
| return res[0] | |||
| class Earley_NoLex: | |||
| def __init__(self, lexer_conf, parser_conf): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||
| self.parser = earley.Parser(rules, parser_conf.start, parser_conf.callback) | |||
| resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||
| self.parser = earley.Parser(rules, | |||
| parser_conf.start, | |||
| parser_conf.callback, | |||
| resolve_ambiguity=resolve_ambiguity) | |||
| def _prepare_expansion(self, expansion): | |||
| for sym in expansion: | |||
| @@ -149,12 +153,16 @@ class Earley_NoLex: | |||
| return self.parser.parse(new_text) | |||
| class Earley(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| WithLexer.__init__(self, lexer_conf) | |||
| rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules] | |||
| self.parser = earley.Parser(rules, parser_conf.start, parser_conf.callback) | |||
| resolve_ambiguity = (options.ambiguity=='resolve') if options else True | |||
| self.parser = earley.Parser(rules, | |||
| parser_conf.start, | |||
| parser_conf.callback, | |||
| resolve_ambiguity=resolve_ambiguity) | |||
| def _prepare_expansion(self, expansion): | |||
| return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | |||
| @@ -101,10 +101,10 @@ class Column: | |||
| # XXX Potential bug: What happens if there's ambiguity in an empty rule? | |||
| if item.rule.expansion and item in self.completed: | |||
| old_tree = self.completed[item].tree | |||
| if old_tree.data != 'ambig': | |||
| if old_tree.data != '_ambig': | |||
| new_tree = old_tree.copy() | |||
| new_tree.rule = old_tree.rule | |||
| old_tree.set('ambig', [new_tree]) | |||
| old_tree.set('_ambig', [new_tree]) | |||
| if item.tree.children[0] is old_tree: # XXX a little hacky! | |||
| raise ParseError("Infinite recursion in grammar!") | |||
| old_tree.children.append(item.tree) | |||
| @@ -125,9 +125,10 @@ class Column: | |||
| return bool(self.item_count) | |||
| class Parser: | |||
| def __init__(self, rules, start, callback): | |||
| def __init__(self, rules, start, callback, resolve_ambiguity=True): | |||
| self.analysis = GrammarAnalyzer(rules, start) | |||
| self.start = start | |||
| self.resolve_ambiguity = resolve_ambiguity | |||
| self.postprocess = {} | |||
| self.predictions = {} | |||
| @@ -197,9 +198,11 @@ class Parser: | |||
| elif len(solutions) == 1: | |||
| tree = solutions[0] | |||
| else: | |||
| tree = Tree('ambig', solutions) | |||
| tree = Tree('_ambig', solutions) | |||
| if self.resolve_ambiguity: | |||
| ResolveAmbig().visit(tree) | |||
| ResolveAmbig().visit(tree) | |||
| return ApplyCallbacks(self.postprocess).transform(tree) | |||
| @@ -220,9 +223,8 @@ def _compare_rules(rule1, rule2): | |||
| assert rule1.origin == rule2.origin | |||
| c = compare( len(rule1.expansion), len(rule2.expansion)) | |||
| if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here | |||
| return c | |||
| else: | |||
| return -c | |||
| c = -c | |||
| return c | |||
| def _compare_drv(tree1, tree2): | |||
| if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | |||
| @@ -242,8 +244,8 @@ def _compare_drv(tree1, tree2): | |||
| class ResolveAmbig(Visitor_NoRecurse): | |||
| def ambig(self, tree): | |||
| best = max(tree.children, key=cmp_to_key(_compare_drv)) | |||
| def _ambig(self, tree): | |||
| best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||
| assert best.data == 'drv' | |||
| tree.set('drv', best.children) | |||
| tree.rule = best.rule # needed for applying callbacks | |||
| @@ -120,6 +120,23 @@ class TestEarley(unittest.TestCase): | |||
| empty_tree = Tree('empty', [Tree('empty2', [])]) | |||
| self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b']) | |||
| def test_earley_explicit_ambiguity(self): | |||
| # This was a sneaky bug! | |||
| grammar = """ | |||
| start: a b | ab | |||
| a: "a" | |||
| b: "b" | |||
| ab: "ab" | |||
| """ | |||
| parser = Lark(grammar, parser='earley', lexer=None, ambiguity='explicit') | |||
| res = parser.parse('ab') | |||
| self.assertEqual( res.data, '_ambig') | |||
| self.assertEqual( len(res.children), 2) | |||
| def _make_parser_test(LEXER, PARSER): | |||
| def _Lark(grammar, **kwargs): | |||
| return Lark(grammar, lexer=LEXER, parser=PARSER, **kwargs) | |||