@@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||
from .lark import Lark | |||
from .utils import inline_args | |||
__version__ = "0.3.4" | |||
__version__ = "0.3.5" |
@@ -28,10 +28,14 @@ class LarkOptions(object): | |||
"auto" (default): Choose for me based on grammar and parser | |||
ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" | |||
"resolve": The parser will automatically choose the simplest derivation | |||
"resolve": The parser will automatically choose the simplest derivation | |||
(it chooses consistently: greedy for tokens, non-greedy for rules) | |||
"explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). | |||
earley__all_derivations - If True, try every possible derivation of each rule. If False, pick the first | |||
correct derivation. Both will find a solution to every correct grammar & input, | |||
but when False, some ambiguities won't appear (Default: True) | |||
transformer - Applies the transformer to every parse tree | |||
debug - Affects verbosity (default: False) | |||
keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False) | |||
@@ -57,6 +61,7 @@ class LarkOptions(object): | |||
self.profile = o.pop('profile', False) | |||
self.ambiguity = o.pop('ambiguity', 'auto') | |||
self.propagate_positions = o.pop('propagate_positions', False) | |||
self.earley__all_derivations = o.pop('earley__all_derivations', True) | |||
assert self.parser in ('earley', 'lalr', None) | |||
@@ -77,7 +77,8 @@ class Earley_NoLex: | |||
self.parser = earley.Parser(rules, | |||
parser_conf.start, | |||
parser_conf.callback, | |||
resolve_ambiguity=get_ambiguity_resolver(options)) | |||
resolve_ambiguity=get_ambiguity_resolver(options), | |||
all_derivations = options.earley__all_derivations if options else True) | |||
def _prepare_expansion(self, expansion): | |||
for sym in expansion: | |||
@@ -100,10 +101,11 @@ class Earley(WithLexer): | |||
rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | |||
self.parser = earley.Parser(rules, | |||
self.parser = earley.Parser(rules, | |||
parser_conf.start, | |||
parser_conf.callback, | |||
resolve_ambiguity=get_ambiguity_resolver(options)) | |||
resolve_ambiguity=get_ambiguity_resolver(options), | |||
all_derivations = options.earley__all_derivations if options else True) | |||
def _prepare_expansion(self, expansion): | |||
return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | |||
@@ -51,7 +51,7 @@ class Item(object): | |||
def advance(self, tree): | |||
assert self.tree.data == 'drv' | |||
new_tree = Derivation(self.rule, self.tree.children + [tree]) | |||
return Item(self.rule, self.ptr+1, self.start, new_tree) | |||
return self.__class__(self.rule, self.ptr+1, self.start, new_tree) | |||
def similar(self, other): | |||
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | |||
@@ -67,6 +67,9 @@ class Item(object): | |||
after = list(map(str, self.rule.expansion[self.ptr:])) | |||
return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after)) | |||
class Item_JoinDerivations(Item): | |||
__eq__ = Item.similar | |||
class NewsList(list): | |||
"Keeps track of newly added items (append-only)" | |||
@@ -133,10 +136,16 @@ class Column: | |||
return bool(self.item_count) | |||
class Parser: | |||
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None): | |||
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, all_derivations=True): | |||
""" | |||
all_derivations: | |||
True = Try every rule combination, and every possible derivation of each rule. (default) | |||
False = Try every rule combination, but not every derivation of the same rule. | |||
""" | |||
self.analysis = GrammarAnalyzer(rules, start_symbol) | |||
self.start_symbol = start_symbol | |||
self.resolve_ambiguity = resolve_ambiguity | |||
self.all_derivations = all_derivations | |||
self.postprocess = {} | |||
self.predictions = {} | |||
@@ -150,9 +159,11 @@ class Parser: | |||
# Define parser functions | |||
start_symbol = start_symbol or self.start_symbol | |||
_Item = Item if self.all_derivations else Item_JoinDerivations | |||
def predict(nonterm, column): | |||
assert not isinstance(nonterm, Terminal), nonterm | |||
return [Item(rule, 0, column, None) for rule in self.predictions[nonterm]] | |||
return [_Item(rule, 0, column, None) for rule in self.predictions[nonterm]] | |||
def complete(item): | |||
name = item.rule.origin | |||
@@ -140,6 +140,35 @@ def _make_full_earley_test(LEXER): | |||
self.assertEqual( res.data, '_ambig') | |||
self.assertEqual( len(res.children), 2) | |||
def test_ambiguity1(self): | |||
grammar = """ | |||
start: cd+ "e" | |||
!cd: "c" | |||
| "d" | |||
| "cd" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
x = l.parse('cde') | |||
assert x.data == '_ambig', x | |||
assert len(x.children) == 2 | |||
@unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO | |||
def test_not_all_derivations(self): | |||
grammar = """ | |||
start: cd+ "e" | |||
!cd: "c" | |||
| "d" | |||
| "cd" | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False) | |||
x = l.parse('cde') | |||
assert x.data != '_ambig', x | |||
assert len(x.children) == 1 | |||
_NAME = "TestFullEarley" + (LEXER or 'Scanless').capitalize() | |||
_TestFullEarley.__name__ = _NAME | |||
globals()[_NAME] = _TestFullEarley | |||
@@ -400,6 +429,7 @@ def _make_parser_test(LEXER, PARSER): | |||
self.assertSequenceEqual(x.children, ['HelloWorld']) | |||
@unittest.skipIf(LEXER is None, "Known bug with scanless parsing") # TODO | |||
def test_token_collision2(self): | |||
# NOTE: This test reveals a bug in token reconstruction in Scanless Earley | |||
# I probably need to re-write grammar transformation | |||
@@ -625,32 +655,6 @@ def _make_parser_test(LEXER, PARSER): | |||
self.assertEqual(len(tree.children), 2) | |||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
def test_earley_prioritization(self): | |||
"Tests effect of priority on result" | |||
grammar = """ | |||
start: a | b | |||
a.1: "a" | |||
b.2: "a" | |||
""" | |||
# l = Lark(grammar, parser='earley', lexer='standard') | |||
l = _Lark(grammar) | |||
res = l.parse("a") | |||
self.assertEqual(res.children[0].data, 'b') | |||
grammar = """ | |||
start: a | b | |||
a.2: "a" | |||
b.1: "a" | |||
""" | |||
l = _Lark(grammar) | |||
# l = Lark(grammar, parser='earley', lexer='standard') | |||
res = l.parse("a") | |||
self.assertEqual(res.children[0].data, 'a') | |||
@unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority") | |||
def test_lexer_prioritization(self): | |||
"Tests effect of priority on result" | |||
@@ -680,22 +684,6 @@ def _make_parser_test(LEXER, PARSER): | |||
self.assertEqual(res.children, ['ab']) | |||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports ambiguity") | |||
def test_ambiguity1(self): | |||
grammar = """ | |||
start: cd+ "e" | |||
!cd: "c" | |||
| "d" | |||
| "cd" | |||
""" | |||
# l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=None) | |||
l = _Lark(grammar, ambiguity='explicit') | |||
x = l.parse('cde') | |||
assert x.data == '_ambig' | |||
assert len(x.children) == 2 | |||
def test_import(self): | |||
grammar = """ | |||
@@ -711,6 +699,33 @@ def _make_parser_test(LEXER, PARSER): | |||
x = l.parse('12 elephants') | |||
self.assertEqual(x.children, ['12', 'elephants']) | |||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
def test_earley_prioritization(self): | |||
"Tests effect of priority on result" | |||
grammar = """ | |||
start: a | b | |||
a.1: "a" | |||
b.2: "a" | |||
""" | |||
# l = Lark(grammar, parser='earley', lexer='standard') | |||
l = _Lark(grammar) | |||
res = l.parse("a") | |||
self.assertEqual(res.children[0].data, 'b') | |||
grammar = """ | |||
start: a | b | |||
a.2: "a" | |||
b.1: "a" | |||
""" | |||
l = _Lark(grammar) | |||
# l = Lark(grammar, parser='earley', lexer='standard') | |||
res = l.parse("a") | |||
self.assertEqual(res.children[0].data, 'a') | |||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||
def test_earley_prioritization_sum(self): | |||
"Tests effect of priority on result" | |||