Pārlūkot izejas kodu

Added earley__all_derivations due to performance concerns

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan pirms 7 gadiem
vecāks
revīzija
aede340449
5 mainītis faili ar 83 papildinājumiem un 50 dzēšanām
  1. +1
    -1
      lark/__init__.py
  2. +6
    -1
      lark/lark.py
  3. +5
    -3
      lark/parser_frontends.py
  4. +14
    -3
      lark/parsers/earley.py
  5. +57
    -42
      tests/test_parser.py

+ 1
- 1
lark/__init__.py Parādīt failu

@@ -3,4 +3,4 @@ from .common import ParseError, GrammarError
from .lark import Lark
from .utils import inline_args

__version__ = "0.3.4"
__version__ = "0.3.5"

+ 6
- 1
lark/lark.py Parādīt failu

@@ -28,10 +28,14 @@ class LarkOptions(object):
"auto" (default): Choose for me based on grammar and parser

ambiguity - Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
"resolve": The parser will automatically choose the simplest derivation
"resolve": The parser will automatically choose the simplest derivation
(it chooses consistently: greedy for tokens, non-greedy for rules)
"explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).

earley__all_derivations - If True, try every possible derivation of each rule. If False, pick the first
correct derivation. Both will find a solution to every correct grammar & input,
but when False, some ambiguities won't appear (Default: True)

transformer - Applies the transformer to every parse tree
debug - Affects verbosity (default: False)
keep_all_tokens - Don't automagically remove "punctuation" tokens (default: False)
@@ -57,6 +61,7 @@ class LarkOptions(object):
self.profile = o.pop('profile', False)
self.ambiguity = o.pop('ambiguity', 'auto')
self.propagate_positions = o.pop('propagate_positions', False)
self.earley__all_derivations = o.pop('earley__all_derivations', True)

assert self.parser in ('earley', 'lalr', None)



+ 5
- 3
lark/parser_frontends.py Parādīt failu

@@ -77,7 +77,8 @@ class Earley_NoLex:
self.parser = earley.Parser(rules,
parser_conf.start,
parser_conf.callback,
resolve_ambiguity=get_ambiguity_resolver(options))
resolve_ambiguity=get_ambiguity_resolver(options),
all_derivations = options.earley__all_derivations if options else True)

def _prepare_expansion(self, expansion):
for sym in expansion:
@@ -100,10 +101,11 @@ class Earley(WithLexer):

rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules]

self.parser = earley.Parser(rules,
self.parser = earley.Parser(rules,
parser_conf.start,
parser_conf.callback,
resolve_ambiguity=get_ambiguity_resolver(options))
resolve_ambiguity=get_ambiguity_resolver(options),
all_derivations = options.earley__all_derivations if options else True)

def _prepare_expansion(self, expansion):
return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion]


+ 14
- 3
lark/parsers/earley.py Parādīt failu

@@ -51,7 +51,7 @@ class Item(object):
def advance(self, tree):
assert self.tree.data == 'drv'
new_tree = Derivation(self.rule, self.tree.children + [tree])
return Item(self.rule, self.ptr+1, self.start, new_tree)
return self.__class__(self.rule, self.ptr+1, self.start, new_tree)

def similar(self, other):
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule
@@ -67,6 +67,9 @@ class Item(object):
after = list(map(str, self.rule.expansion[self.ptr:]))
return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after))

class Item_JoinDerivations(Item):
__eq__ = Item.similar


class NewsList(list):
"Keeps track of newly added items (append-only)"
@@ -133,10 +136,16 @@ class Column:
return bool(self.item_count)

class Parser:
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None):
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, all_derivations=True):
"""
all_derivations:
True = Try every rule combination, and every possible derivation of each rule. (default)
False = Try every rule combination, but not every derivation of the same rule.
"""
self.analysis = GrammarAnalyzer(rules, start_symbol)
self.start_symbol = start_symbol
self.resolve_ambiguity = resolve_ambiguity
self.all_derivations = all_derivations

self.postprocess = {}
self.predictions = {}
@@ -150,9 +159,11 @@ class Parser:
# Define parser functions
start_symbol = start_symbol or self.start_symbol

_Item = Item if self.all_derivations else Item_JoinDerivations

def predict(nonterm, column):
assert not isinstance(nonterm, Terminal), nonterm
return [Item(rule, 0, column, None) for rule in self.predictions[nonterm]]
return [_Item(rule, 0, column, None) for rule in self.predictions[nonterm]]

def complete(item):
name = item.rule.origin


+ 57
- 42
tests/test_parser.py Parādīt failu

@@ -140,6 +140,35 @@ def _make_full_earley_test(LEXER):
self.assertEqual( res.data, '_ambig')
self.assertEqual( len(res.children), 2)

def test_ambiguity1(self):
grammar = """
start: cd+ "e"

!cd: "c"
| "d"
| "cd"

"""
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
x = l.parse('cde')
assert x.data == '_ambig', x
assert len(x.children) == 2

@unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
def test_not_all_derivations(self):
grammar = """
start: cd+ "e"

!cd: "c"
| "d"
| "cd"

"""
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
x = l.parse('cde')
assert x.data != '_ambig', x
assert len(x.children) == 1

_NAME = "TestFullEarley" + (LEXER or 'Scanless').capitalize()
_TestFullEarley.__name__ = _NAME
globals()[_NAME] = _TestFullEarley
@@ -400,6 +429,7 @@ def _make_parser_test(LEXER, PARSER):
self.assertSequenceEqual(x.children, ['HelloWorld'])


@unittest.skipIf(LEXER is None, "Known bug with scanless parsing") # TODO
def test_token_collision2(self):
# NOTE: This test reveals a bug in token reconstruction in Scanless Earley
# I probably need to re-write grammar transformation
@@ -625,32 +655,6 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(len(tree.children), 2)


@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization(self):
"Tests effect of priority on result"

grammar = """
start: a | b
a.1: "a"
b.2: "a"
"""

# l = Lark(grammar, parser='earley', lexer='standard')
l = _Lark(grammar)
res = l.parse("a")
self.assertEqual(res.children[0].data, 'b')

grammar = """
start: a | b
a.2: "a"
b.1: "a"
"""

l = _Lark(grammar)
# l = Lark(grammar, parser='earley', lexer='standard')
res = l.parse("a")
self.assertEqual(res.children[0].data, 'a')

@unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
def test_lexer_prioritization(self):
"Tests effect of priority on result"
@@ -680,22 +684,6 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(res.children, ['ab'])


@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports ambiguity")
def test_ambiguity1(self):
grammar = """
start: cd+ "e"

!cd: "c"
| "d"
| "cd"

"""
# l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=None)
l = _Lark(grammar, ambiguity='explicit')
x = l.parse('cde')
assert x.data == '_ambig'
assert len(x.children) == 2


def test_import(self):
grammar = """
@@ -711,6 +699,33 @@ def _make_parser_test(LEXER, PARSER):
x = l.parse('12 elephants')
self.assertEqual(x.children, ['12', 'elephants'])

@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization(self):
"Tests effect of priority on result"

grammar = """
start: a | b
a.1: "a"
b.2: "a"
"""

# l = Lark(grammar, parser='earley', lexer='standard')
l = _Lark(grammar)
res = l.parse("a")
self.assertEqual(res.children[0].data, 'b')

grammar = """
start: a | b
a.2: "a"
b.1: "a"
"""

l = _Lark(grammar)
# l = Lark(grammar, parser='earley', lexer='standard')
res = l.parse("a")
self.assertEqual(res.children[0].data, 'a')


@unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
def test_earley_prioritization_sum(self):
"Tests effect of priority on result"


Notiek ielāde…
Atcelt
Saglabāt