Kaynağa Gözat

Merge branch 'fix-cycle-handling' of https://github.com/chanicpanic/lark into chanicpanic-fix-cycle-handling

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.0
Erez Sh 3 yıl önce
ebeveyn
işleme
26643eef84
3 değiştirilmiş dosya ile 101 ekleme ve 16 silme
  1. +1
    -1
      lark/parsers/earley.py
  2. +30
    -15
      lark/parsers/earley_forest.py
  3. +70
    -0
      tests/test_parser.py

+ 1
- 1
lark/parsers/earley.py Dosyayı Görüntüle

@@ -316,7 +316,7 @@ class Parser:

if self.tree_class is not None:
# Perform our SPPF -> AST conversion
transformer = ForestToParseTree(self.tree_class, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity)
transformer = ForestToParseTree(self.tree_class, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity, self.debug)
return transformer.transform(solutions[0])

# return the root of the SPPF


+ 30
- 15
lark/parsers/earley_forest.py Dosyayı Görüntüle

@@ -483,26 +483,31 @@ class ForestToParseTree(ForestTransformer):
tree.
"""

def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True):
def __init__(self, tree_class=Tree, callbacks=dict(), prioritizer=ForestSumVisitor(), resolve_ambiguity=True, debug=False):
super(ForestToParseTree, self).__init__()
self.tree_class = tree_class
self.callbacks = callbacks
self.prioritizer = prioritizer
self.resolve_ambiguity = resolve_ambiguity
self.debug = debug
self._on_cycle_retreat = False
self._cycle_node = None
self._successful_visits = set()

def on_cycle(self, node, path):
logger.warning("Cycle encountered in the SPPF at node: %s. "
"As infinite ambiguities cannot be represented in a tree, "
"this family of derivations will be discarded.", node)
if self.resolve_ambiguity:
# TODO: choose a different path if cycle is encountered
logger.warning("At this time, using ambiguity resolution for SPPFs "
"with cycles may result in None being returned.")
if self.debug:
logger.warning("Cycle encountered in the SPPF at node: %s. "
"As infinite ambiguities cannot be represented in a tree, "
"this family of derivations will be discarded.", node)
self._cycle_node = node
self._on_cycle_retreat = True

def _check_cycle(self, node):
if self._on_cycle_retreat:
if id(node) == id(self._cycle_node):
self._cycle_node = None
self._on_cycle_retreat = False
return
raise Discard()

def _collapse_ambig(self, children):
@@ -531,11 +536,17 @@ class ForestToParseTree(ForestTransformer):
raise Discard()

def transform_symbol_node(self, node, data):
if id(node) not in self._successful_visits:
raise Discard()
self._successful_visits.remove(id(node))
self._check_cycle(node)
data = self._collapse_ambig(data)
return self._call_ambig_func(node, data)

def transform_intermediate_node(self, node, data):
if id(node) not in self._successful_visits:
raise Discard()
self._successful_visits.remove(id(node))
self._check_cycle(node)
if len(data) > 1:
children = [self.tree_class('_inter', c) for c in data]
@@ -544,6 +555,8 @@ class ForestToParseTree(ForestTransformer):

def transform_packed_node(self, node, data):
self._check_cycle(node)
if self.resolve_ambiguity and id(node.parent) in self._successful_visits:
raise Discard()
children = []
assert len(data) <= 2
data = PackedData(node, data)
@@ -559,21 +572,23 @@ class ForestToParseTree(ForestTransformer):
return self._call_rule_func(node, children)

def visit_symbol_node_in(self, node):
self._on_cycle_retreat = False
super(ForestToParseTree, self).visit_symbol_node_in(node)
if self._on_cycle_retreat:
return
if self.prioritizer and node.is_ambiguous and isinf(node.priority):
self.prioritizer.visit(node)
if self.resolve_ambiguity:
return node.children[0]
return node.children

def visit_packed_node_in(self, node):
self._on_cycle_retreat = False
return super(ForestToParseTree, self).visit_packed_node_in(node)
to_visit = super(ForestToParseTree, self).visit_packed_node_in(node)
if not self.resolve_ambiguity or id(node.parent) not in self._successful_visits:
return to_visit

def visit_token_node(self, node):
self._on_cycle_retreat = False
return super(ForestToParseTree, self).visit_token_node(node)
def visit_packed_node_out(self, node):
super(ForestToParseTree, self).visit_packed_node_out(node)
if not self._on_cycle_retreat:
self._successful_visits.add(id(node.parent))

def handles_ambiguity(func):
"""Decorator for methods of subclasses of ``TreeForestTransformer``.


+ 70
- 0
tests/test_parser.py Dosyayı Görüntüle

@@ -746,6 +746,76 @@ def _make_full_earley_test(LEXER):
tree = parser.parse(text)
self.assertEqual(tree.children, ['foo', 'bar'])

def test_cycle(self):
grammar = """
start: start?
"""

l = Lark(grammar, ambiguity='resolve', lexer=LEXER)
tree = l.parse('')
self.assertEqual(tree, Tree('start', []))

l = Lark(grammar, ambiguity='explicit', lexer=LEXER)
tree = l.parse('')
self.assertEqual(tree, Tree('start', []))

def test_cycles(self):
grammar = """
a: b
b: c*
c: a
"""

l = Lark(grammar, start='a', ambiguity='resolve', lexer=LEXER)
tree = l.parse('')
self.assertEqual(tree, Tree('a', [Tree('b', [])]))

l = Lark(grammar, start='a', ambiguity='explicit', lexer=LEXER)
tree = l.parse('')
self.assertEqual(tree, Tree('a', [Tree('b', [])]))

def test_many_cycles(self):
grammar = """
start: a? | start start
!a: "a"
"""

l = Lark(grammar, ambiguity='resolve', lexer=LEXER)
tree = l.parse('a')
self.assertEqual(tree, Tree('start', [Tree('a', ['a'])]))

l = Lark(grammar, ambiguity='explicit', lexer=LEXER)
tree = l.parse('a')
self.assertEqual(tree, Tree('start', [Tree('a', ['a'])]))

def test_cycles_with_child_filter(self):
grammar = """
a: _x
_x: _x? b
b:
"""

grammar2 = """
a: x
x: x? b
b:
"""

l = Lark(grammar, start='a', ambiguity='resolve', lexer=LEXER)
tree = l.parse('')
self.assertEqual(tree, Tree('a', [Tree('b', [])]))

l = Lark(grammar, start='a', ambiguity='explicit', lexer=LEXER)
tree = l.parse('');
self.assertEqual(tree, Tree('a', [Tree('b', [])]))

l = Lark(grammar2, start='a', ambiguity='resolve', lexer=LEXER)
tree = l.parse('');
self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])]))

l = Lark(grammar2, start='a', ambiguity='explicit', lexer=LEXER)
tree = l.parse('');
self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])]))





Yükleniyor…
İptal
Kaydet