Browse Source

Merge pull request #661 from chanicpanic/handle-ambiguous-intermediates

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
Erez Shinan 4 years ago
committed by GitHub
parent
commit
4bc8b9e29e
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 367 additions and 2 deletions
  1. +81
    -0
      lark/parse_tree_builder.py
  2. +2
    -2
      lark/parsers/earley.py
  3. +69
    -0
      lark/parsers/earley_forest.py
  4. +215
    -0
      tests/test_parser.py

+ 81
- 0
lark/parse_tree_builder.py View File

@@ -195,6 +195,86 @@ def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
if to_expand:
return partial(AmbiguousExpander, to_expand, tree_class)

class AmbiguousIntermediateExpander:
"""
Propagate ambiguous intermediate nodes and their derivations up to the
current rule.

In general, converts

rule
_iambig
_inter
someChildren1
...
_inter
someChildren2
...
someChildren3
...

to

_ambig
rule
someChildren1
...
someChildren3
...
rule
someChildren2
...
someChildren3
...
rule
childrenFromNestedIambigs
...
someChildren3
...
...

propagating up any nested '_iambig' nodes along the way.
"""

def __init__(self, tree_class, node_builder):
self.node_builder = node_builder
self.tree_class = tree_class

def __call__(self, children):
def _is_iambig_tree(child):
return hasattr(child, 'data') and child.data == '_iambig'

def _collapse_iambig(children):
"""
Recursively flatten the derivations of the parent of an '_iambig'
node. Returns a list of '_inter' nodes guaranteed not
to contain any nested '_iambig' nodes, or None if children does
not contain an '_iambig' node.
"""

# Due to the structure of the SPPF,
# an '_iambig' node can only appear as the first child
if children and _is_iambig_tree(children[0]):
iambig_node = children[0]
result = []
for grandchild in iambig_node.children:
collapsed = _collapse_iambig(grandchild.children)
if collapsed:
for child in collapsed:
child.children += children[1:]
result += collapsed
else:
new_tree = self.tree_class('_inter', grandchild.children + children[1:])
result.append(new_tree)
return result

collapsed = _collapse_iambig(children)
if collapsed:
processed_nodes = [self.node_builder(c.children) for c in collapsed]
return self.tree_class('_ambig', processed_nodes)

return self.node_builder(children)

def ptb_inline_args(func):
@wraps(func)
def f(children):
@@ -239,6 +319,7 @@ class ParseTreeBuilder:
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders else None),
self.propagate_positions and PropagatePositions,
self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
self.ambiguous and partial(AmbiguousIntermediateExpander, self.tree_class)
]))

yield rule, wrapper_chain


+ 2
- 2
lark/parsers/earley.py View File

@@ -18,7 +18,7 @@ from ..utils import logger
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal
from .earley_common import Item, TransitiveItem
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, CompleteForestToAmbiguousTreeVisitor

class Parser:
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False):
@@ -313,7 +313,7 @@ class Parser:
assert False, 'Earley should not generate multiple start symbol items!'

# Perform our SPPF -> AST conversion using the right ForestVisitor.
forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else ForestToAmbiguousTreeVisitor
forest_tree_visitor_cls = ForestToTreeVisitor if self.resolve_ambiguity else CompleteForestToAmbiguousTreeVisitor
forest_tree_visitor = forest_tree_visitor_cls(self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor())

return forest_tree_visitor.visit(solutions[0])


+ 69
- 0
lark/parsers/earley_forest.py View File

@@ -363,6 +363,75 @@ class ForestToAmbiguousTreeVisitor(ForestToTreeVisitor):
else:
self.result = result

class CompleteForestToAmbiguousTreeVisitor(ForestToTreeVisitor):
"""
An augmented version of ForestToAmbiguousTreeVisitor that is designed to
handle ambiguous intermediate nodes as well as ambiguous symbol nodes.

On the way down:

- When an ambiguous intermediate node is encountered, an '_iambig' node
is inserted into the tree.
- Each possible derivation of an ambiguous intermediate node is represented
by an '_inter' node added as a child of the corresponding '_iambig' node.

On the way up, these nodes are propagated up the tree and collapsed
into a single '_ambig' node for the nearest symbol node ancestor.
This is achieved by the AmbiguousIntermediateExpander contained in
the callbacks.
"""

def _collapse_ambig(self, children):
new_children = []
for child in children:
if child.data == '_ambig':
new_children += child.children
else:
new_children.append(child)
return new_children

def visit_token_node(self, node):
self.output_stack[-1].children.append(node)

def visit_symbol_node_in(self, node):
if node.is_ambiguous:
if self.forest_sum_visitor and isinf(node.priority):
self.forest_sum_visitor.visit(node)
if node.is_intermediate:
self.output_stack.append(Tree('_iambig', []))
else:
self.output_stack.append(Tree('_ambig', []))
return iter(node.children)

def visit_symbol_node_out(self, node):
if node.is_ambiguous:
result = self.output_stack.pop()
if not node.is_intermediate:
result = Tree('_ambig', self._collapse_ambig(result.children))
if self.output_stack:
self.output_stack[-1].children.append(result)
else:
self.result = result

def visit_packed_node_in(self, node):
if not node.parent.is_intermediate:
self.output_stack.append(Tree('drv', []))
elif node.parent.is_ambiguous:
self.output_stack.append(Tree('_inter', []))
return iter([node.left, node.right])

def visit_packed_node_out(self, node):
if not node.parent.is_intermediate:
result = self.callbacks[node.rule](self.output_stack.pop().children)
elif node.parent.is_ambiguous:
result = self.output_stack.pop()
else:
return
if self.output_stack:
self.output_stack[-1].children.append(result)
else:
self.result = result

class ForestToPyDotVisitor(ForestVisitor):
"""
A Forest visitor which writes the SPPF to a PNG.


+ 215
- 0
tests/test_parser.py View File

@@ -460,6 +460,221 @@ def _make_full_earley_test(LEXER):
])
self.assertEqual(res, expected)

def test_ambiguous_intermediate_node(self):
grammar = """
start: ab bc d?
!ab: "A" "B"?
!bc: "B"? "C"
!d: "D"
"""

l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
ambig_tree = l.parse("ABCD")
expected = {
Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]),
Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C']), Tree('d', ['D'])])
}
self.assertEqual(ambig_tree.data, '_ambig')
self.assertEqual(set(ambig_tree.children), expected)

def test_ambiguous_symbol_and_intermediate_nodes(self):
grammar = """
start: ab bc cd
!ab: "A" "B"?
!bc: "B"? "C"?
!cd: "C"? "D"
"""

l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
ambig_tree = l.parse("ABCD")
expected = {
Tree('start', [
Tree('ab', ['A', 'B']),
Tree('bc', ['C']),
Tree('cd', ['D'])
]),
Tree('start', [
Tree('ab', ['A', 'B']),
Tree('bc', []),
Tree('cd', ['C', 'D'])
]),
Tree('start', [
Tree('ab', ['A']),
Tree('bc', ['B', 'C']),
Tree('cd', ['D'])
]),
Tree('start', [
Tree('ab', ['A']),
Tree('bc', ['B']),
Tree('cd', ['C', 'D'])
]),
}
self.assertEqual(ambig_tree.data, '_ambig')
self.assertEqual(set(ambig_tree.children), expected)

def test_nested_ambiguous_intermediate_nodes(self):
grammar = """
start: ab bc cd e?
!ab: "A" "B"?
!bc: "B"? "C"?
!cd: "C"? "D"
!e: "E"
"""

l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
ambig_tree = l.parse("ABCDE")
expected = {
Tree('start', [
Tree('ab', ['A', 'B']),
Tree('bc', ['C']),
Tree('cd', ['D']),
Tree('e', ['E'])
]),
Tree('start', [
Tree('ab', ['A']),
Tree('bc', ['B', 'C']),
Tree('cd', ['D']),
Tree('e', ['E'])
]),
Tree('start', [
Tree('ab', ['A']),
Tree('bc', ['B']),
Tree('cd', ['C', 'D']),
Tree('e', ['E'])
]),
Tree('start', [
Tree('ab', ['A', 'B']),
Tree('bc', []),
Tree('cd', ['C', 'D']),
Tree('e', ['E'])
]),
}
self.assertEqual(ambig_tree.data, '_ambig')
self.assertEqual(set(ambig_tree.children), expected)

def test_nested_ambiguous_intermediate_nodes2(self):
grammar = """
start: ab bc cd de f
!ab: "A" "B"?
!bc: "B"? "C"?
!cd: "C"? "D"?
!de: "D"? "E"
!f: "F"
"""

l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
ambig_tree = l.parse("ABCDEF")
expected = {
Tree('start', [
Tree('ab', ['A', 'B']),
Tree('bc', ['C']),
Tree('cd', ['D']),
Tree('de', ['E']),
Tree('f', ['F']),
]),
Tree('start', [
Tree('ab', ['A']),
Tree('bc', ['B', 'C']),
Tree('cd', ['D']),
Tree('de', ['E']),
Tree('f', ['F']),
]),
Tree('start', [
Tree('ab', ['A']),
Tree('bc', ['B']),
Tree('cd', ['C', 'D']),
Tree('de', ['E']),
Tree('f', ['F']),
]),
Tree('start', [
Tree('ab', ['A']),
Tree('bc', ['B']),
Tree('cd', ['C']),
Tree('de', ['D', 'E']),
Tree('f', ['F']),
]),
Tree('start', [
Tree('ab', ['A', "B"]),
Tree('bc', []),
Tree('cd', ['C']),
Tree('de', ['D', 'E']),
Tree('f', ['F']),
]),
Tree('start', [
Tree('ab', ['A']),
Tree('bc', ['B', 'C']),
Tree('cd', []),
Tree('de', ['D', 'E']),
Tree('f', ['F']),
]),
Tree('start', [
Tree('ab', ['A', 'B']),
Tree('bc', []),
Tree('cd', ['C', 'D']),
Tree('de', ['E']),
Tree('f', ['F']),
]),
Tree('start', [
Tree('ab', ['A', 'B']),
Tree('bc', ['C']),
Tree('cd', []),
Tree('de', ['D', 'E']),
Tree('f', ['F']),
]),
}
self.assertEqual(ambig_tree.data, '_ambig')
self.assertEqual(set(ambig_tree.children), expected)

def test_ambiguous_intermediate_node_unnamed_token(self):
grammar = """
start: ab bc "D"
!ab: "A" "B"?
!bc: "B"? "C"
"""

l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
ambig_tree = l.parse("ABCD")
expected = {
Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C'])]),
Tree('start', [Tree('ab', ['A', 'B']), Tree('bc', ['C'])])
}
self.assertEqual(ambig_tree.data, '_ambig')
self.assertEqual(set(ambig_tree.children), expected)

def test_ambiguous_intermediate_node_inlined_rule(self):
grammar = """
start: ab _bc d?
!ab: "A" "B"?
_bc: "B"? "C"
!d: "D"
"""

l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
ambig_tree = l.parse("ABCD")
expected = {
Tree('start', [Tree('ab', ['A']), Tree('d', ['D'])]),
Tree('start', [Tree('ab', ['A', 'B']), Tree('d', ['D'])])
}
self.assertEqual(ambig_tree.data, '_ambig')
self.assertEqual(set(ambig_tree.children), expected)

def test_ambiguous_intermediate_node_conditionally_inlined_rule(self):
grammar = """
start: ab bc d?
!ab: "A" "B"?
!?bc: "B"? "C"
!d: "D"
"""

l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
ambig_tree = l.parse("ABCD")
expected = {
Tree('start', [Tree('ab', ['A']), Tree('bc', ['B', 'C']), Tree('d', ['D'])]),
Tree('start', [Tree('ab', ['A', 'B']), 'C', Tree('d', ['D'])])
}
self.assertEqual(ambig_tree.data, '_ambig')
self.assertEqual(set(ambig_tree.children), expected)

def test_fruitflies_ambig(self):
grammar = """
start: noun verb noun -> simple


Loading…
Cancel
Save