| @@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||
| from .lark import Lark | |||
| from .utils import inline_args | |||
| __version__ = "0.3.4" | |||
| __version__ = "0.3.5" | |||
| @@ -28,10 +28,14 @@ class LarkOptions(object): | |||
| "auto" (default): Choose for me based on grammar and parser | |||
| ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" | |||
| "resolve": The parser will automatically choose the simplest derivation | |||
| "resolve": The parser will automatically choose the simplest derivation | |||
| (it chooses consistently: greedy for tokens, non-greedy for rules) | |||
| "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). | |||
| earley__all_derivations - If True, try every possible derivation of each rule. If False, pick the first | |||
| correct derivation. Both will find a solution to every correct grammar & input, | |||
| but when False, some ambiguities won't appear (Default: True) | |||
| transformer - Applies the transformer to every parse tree | |||
| debug - Affects verbosity (default: False) | |||
| keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False) | |||
| @@ -57,6 +61,7 @@ class LarkOptions(object): | |||
| self.profile = o.pop('profile', False) | |||
| self.ambiguity = o.pop('ambiguity', 'auto') | |||
| self.propagate_positions = o.pop('propagate_positions', False) | |||
| self.earley__all_derivations = o.pop('earley__all_derivations', True) | |||
| assert self.parser in ('earley', 'lalr', None) | |||
| @@ -77,7 +77,8 @@ class Earley_NoLex: | |||
| self.parser = earley.Parser(rules, | |||
| parser_conf.start, | |||
| parser_conf.callback, | |||
| resolve_ambiguity=get_ambiguity_resolver(options)) | |||
| resolve_ambiguity=get_ambiguity_resolver(options), | |||
| all_derivations = options.earley__all_derivations if options else True) | |||
| def _prepare_expansion(self, expansion): | |||
| for sym in expansion: | |||
| @@ -100,10 +101,11 @@ class Earley(WithLexer): | |||
| rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | |||
| self.parser = earley.Parser(rules, | |||
| self.parser = earley.Parser(rules, | |||
| parser_conf.start, | |||
| parser_conf.callback, | |||
| resolve_ambiguity=get_ambiguity_resolver(options)) | |||
| resolve_ambiguity=get_ambiguity_resolver(options), | |||
| all_derivations = options.earley__all_derivations if options else True) | |||
| def _prepare_expansion(self, expansion): | |||
| return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | |||
| @@ -51,7 +51,7 @@ class Item(object): | |||
| def advance(self, tree): | |||
| assert self.tree.data == 'drv' | |||
| new_tree = Derivation(self.rule, self.tree.children + [tree]) | |||
| return Item(self.rule, self.ptr+1, self.start, new_tree) | |||
| return self.__class__(self.rule, self.ptr+1, self.start, new_tree) | |||
| def similar(self, other): | |||
| return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | |||
| @@ -67,6 +67,9 @@ class Item(object): | |||
| after = list(map(str, self.rule.expansion[self.ptr:])) | |||
| return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after)) | |||
| class Item_JoinDerivations(Item): | |||
| __eq__ = Item.similar | |||
| class NewsList(list): | |||
| "Keeps track of newly added items (append-only)" | |||
| @@ -133,10 +136,16 @@ class Column: | |||
| return bool(self.item_count) | |||
| class Parser: | |||
| def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None): | |||
| def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, all_derivations=True): | |||
| """ | |||
| all_derivations: | |||
| True = Try every rule combination, and every possible derivation of each rule. (default) | |||
| False = Try every rule combination, but not every derivation of the same rule. | |||
| """ | |||
| self.analysis = GrammarAnalyzer(rules, start_symbol) | |||
| self.start_symbol = start_symbol | |||
| self.resolve_ambiguity = resolve_ambiguity | |||
| self.all_derivations = all_derivations | |||
| self.postprocess = {} | |||
| self.predictions = {} | |||
| @@ -150,9 +159,11 @@ class Parser: | |||
| # Define parser functions | |||
| start_symbol = start_symbol or self.start_symbol | |||
| _Item = Item if self.all_derivations else Item_JoinDerivations | |||
| def predict(nonterm, column): | |||
| assert not isinstance(nonterm, Terminal), nonterm | |||
| return [Item(rule, 0, column, None) for rule in self.predictions[nonterm]] | |||
| return [_Item(rule, 0, column, None) for rule in self.predictions[nonterm]] | |||
| def complete(item): | |||
| name = item.rule.origin | |||
| @@ -140,6 +140,35 @@ def _make_full_earley_test(LEXER): | |||
| self.assertEqual( res.data, '_ambig') | |||
| self.assertEqual( len(res.children), 2) | |||
| def test_ambiguity1(self): | |||
| grammar = """ | |||
| start: cd+ "e" | |||
| !cd: "c" | |||
| | "d" | |||
| | "cd" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
| x = l.parse('cde') | |||
| assert x.data == '_ambig', x | |||
| assert len(x.children) == 2 | |||
| @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO | |||
| def test_not_all_derivations(self): | |||
| grammar = """ | |||
| start: cd+ "e" | |||
| !cd: "c" | |||
| | "d" | |||
| | "cd" | |||
| """ | |||
| l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False) | |||
| x = l.parse('cde') | |||
| assert x.data != '_ambig', x | |||
| assert len(x.children) == 1 | |||
| _NAME = "TestFullEarley" + (LEXER or 'Scanless').capitalize() | |||
| _TestFullEarley.__name__ = _NAME | |||
| globals()[_NAME] = _TestFullEarley | |||
| @@ -400,6 +429,7 @@ def _make_parser_test(LEXER, PARSER): | |||
| self.assertSequenceEqual(x.children, ['HelloWorld']) | |||
| @unittest.skipIf(LEXER is None, "Known bug with scanless parsing") # TODO | |||
| def test_token_collision2(self): | |||
| # NOTE: This test reveals a bug in token reconstruction in Scanless Earley | |||
| # I probably need to re-write grammar transformation | |||
| @@ -625,32 +655,6 @@ def _make_parser_test(LEXER, PARSER): | |||
| self.assertEqual(len(tree.children), 2) | |||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
| def test_earley_prioritization(self): | |||
| "Tests effect of priority on result" | |||
| grammar = """ | |||
| start: a | b | |||
| a.1: "a" | |||
| b.2: "a" | |||
| """ | |||
| # l = Lark(grammar, parser='earley', lexer='standard') | |||
| l = _Lark(grammar) | |||
| res = l.parse("a") | |||
| self.assertEqual(res.children[0].data, 'b') | |||
| grammar = """ | |||
| start: a | b | |||
| a.2: "a" | |||
| b.1: "a" | |||
| """ | |||
| l = _Lark(grammar) | |||
| # l = Lark(grammar, parser='earley', lexer='standard') | |||
| res = l.parse("a") | |||
| self.assertEqual(res.children[0].data, 'a') | |||
| @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority") | |||
| def test_lexer_prioritization(self): | |||
| "Tests effect of priority on result" | |||
| @@ -680,22 +684,6 @@ def _make_parser_test(LEXER, PARSER): | |||
| self.assertEqual(res.children, ['ab']) | |||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports ambiguity") | |||
| def test_ambiguity1(self): | |||
| grammar = """ | |||
| start: cd+ "e" | |||
| !cd: "c" | |||
| | "d" | |||
| | "cd" | |||
| """ | |||
| # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=None) | |||
| l = _Lark(grammar, ambiguity='explicit') | |||
| x = l.parse('cde') | |||
| assert x.data == '_ambig' | |||
| assert len(x.children) == 2 | |||
| def test_import(self): | |||
| grammar = """ | |||
| @@ -711,6 +699,33 @@ def _make_parser_test(LEXER, PARSER): | |||
| x = l.parse('12 elephants') | |||
| self.assertEqual(x.children, ['12', 'elephants']) | |||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
| def test_earley_prioritization(self): | |||
| "Tests effect of priority on result" | |||
| grammar = """ | |||
| start: a | b | |||
| a.1: "a" | |||
| b.2: "a" | |||
| """ | |||
| # l = Lark(grammar, parser='earley', lexer='standard') | |||
| l = _Lark(grammar) | |||
| res = l.parse("a") | |||
| self.assertEqual(res.children[0].data, 'b') | |||
| grammar = """ | |||
| start: a | b | |||
| a.2: "a" | |||
| b.1: "a" | |||
| """ | |||
| l = _Lark(grammar) | |||
| # l = Lark(grammar, parser='earley', lexer='standard') | |||
| res = l.parse("a") | |||
| self.assertEqual(res.children[0].data, 'a') | |||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
| def test_earley_prioritization_sum(self): | |||
| "Tests effect of priority on result" | |||