Browse Source

Added the Forest interface for explicit ambiguity

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.6
Erez Shinan 6 years ago
parent
commit
76e185a36c
5 changed files with 26 additions and 10 deletions
  1. +2
    -4
      lark/parsers/earley.py
  2. +17
    -0
      lark/parsers/earley_forest.py
  3. +2
    -2
      lark/parsers/xearley.py
  4. +1
    -2
      tests/__main__.py
  5. +4
    -2
      tests/test_parser.py

+ 2
- 4
lark/parsers/earley.py View File

@@ -17,9 +17,7 @@ from ..exceptions import ParseError, UnexpectedToken
from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal
from .earley_common import Item, TransitiveItem
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode

from collections import deque, defaultdict
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, Forest

class Parser:
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, forest_sum_visitor = ForestSumVisitor):
@@ -295,7 +293,7 @@ class Parser:
## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller.
# This means the caller can work directly with the SPPF tree.
if not self.resolve_ambiguity:
return ForestToAmbiguousTreeVisitor(solutions[0], self.callbacks).go()
return Forest(solutions[0], self.callbacks)

# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
# according to the rules.


+ 17
- 0
lark/parsers/earley_forest.py View File

@@ -462,3 +462,20 @@ class ForestToPyDotVisitor(ForestVisitor):
child_graph_node_id = str(id(child))
child_graph_node = self.graph.get_node(child_graph_node_id)[0]
self.graph.add_edge(self.pydot.Edge(graph_node, child_graph_node))

class Forest(Tree):
def __init__(self, root, callbacks):
self.root = root
self.callbacks = callbacks
self.data = '_ambig'
self._children = None

@property
def children(self):
if self._children is None:
t = ForestToAmbiguousTreeVisitor(self.callbacks).go(self.root)
self._children = t.children
return self._children

def to_pydot(self, filename):
ForestToPyDotVisitor().go(self.root, filename)

+ 2
- 2
lark/parsers/xearley.py View File

@@ -24,7 +24,7 @@ from .grammar_analysis import GrammarAnalyzer
from ..grammar import NonTerminal, Terminal
from .earley import ApplyCallbacks
from .earley_common import Item, TransitiveItem
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, ForestToAmbiguousTreeVisitor
from .earley_forest import ForestToTreeVisitor, ForestSumVisitor, SymbolNode, Forest


class Parser:
@@ -359,7 +359,7 @@ class Parser:
## If we're not resolving ambiguity, we just return the root of the SPPF tree to the caller.
# This means the caller can work directly with the SPPF tree.
if not self.resolve_ambiguity:
return ForestToAmbiguousTreeVisitor(self.callbacks).go(solutions[0])
return Forest(solutions[0], self.callbacks)

# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
# according to the rules.


+ 1
- 2
tests/__main__.py View File

@@ -20,10 +20,9 @@ from .test_parser import (
TestEarleyStandard,
TestCykStandard,
TestLalrContextual,
# TestEarleyScanless,
TestEarleyDynamic,

# TestFullEarleyScanless,
# TestFullEarleyStandard,
TestFullEarleyDynamic,
TestFullEarleyDynamic_complete,



+ 4
- 2
tests/test_parser.py View File

@@ -228,6 +228,7 @@ def _make_full_earley_test(LEXER):
empty_tree = Tree('empty', [Tree('empty2', [])])
self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])

@unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
def test_earley_explicit_ambiguity(self):
# This was a sneaky bug!

@@ -244,6 +245,7 @@ def _make_full_earley_test(LEXER):
self.assertEqual( ambig_tree.data, '_ambig')
self.assertEqual( len(ambig_tree.children), 2)

@unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
def test_ambiguity1(self):
grammar = """
start: cd+ "e"
@@ -261,7 +263,7 @@ def _make_full_earley_test(LEXER):
assert ambig_tree.data == '_ambig', ambig_tree
assert len(ambig_tree.children) == 2

@unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions")
@unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
def test_ambiguity2(self):
grammar = """
ANY: /[a-zA-Z0-9 ]+/
@@ -324,7 +326,7 @@ def _make_full_earley_test(LEXER):
self.assertEqual(set(tree.children), set(expected.children))


@unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
@unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
def test_explicit_ambiguity2(self):
grammar = r"""
start: NAME+


Loading…
Cancel
Save