Browse Source

Speed up repetitive parsing using the same parser

When using the same parser repeatedly for small parsers we incur
significant overhead by recreating the ForestVisitor each parser.
We can cache the Forest walker and re-use it by making it stateless.
Also, we can use slots for all of the Forest Walkers to reduce
construction delay and function call overhead.
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.6
night199uk 6 years ago
parent
commit
bb22c84df3
4 changed files with 26 additions and 23 deletions
  1. +2
    -2
      lark/parsers/earley.py
  2. +19
    -16
      lark/parsers/earley_forest.py
  3. +2
    -2
      lark/parsers/xearley.py
  4. +3
    -3
      tests/test_parser.py

+ 2
- 2
lark/parsers/earley.py View File

@@ -26,7 +26,6 @@ class Parser:
analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity
self.forest_sum_visitor = forest_sum_visitor

self.FIRST = analysis.FIRST
self.callbacks = {}
@@ -41,6 +40,7 @@ class Parser:
self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]

self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks)
self.term_matcher = term_matcher


@@ -203,7 +203,7 @@ class Parser:

# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
# according to the rules.
return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go()
return self.forest_tree_visitor.go(solutions[0])

class ApplyCallbacks(Transformer_InPlace):
def __init__(self, postprocess):


+ 19
- 16
lark/parsers/earley_forest.py View File

@@ -114,9 +114,7 @@ class ForestVisitor(object):

Use this as a base when you need to walk the forest.
"""
def __init__(self, root):
self.root = root
self.result = None
__slots__ = ['result']

def visit_token_node(self, node): pass
def visit_symbol_node_in(self, node): pass
@@ -124,7 +122,8 @@ class ForestVisitor(object):
def visit_packed_node_in(self, node): pass
def visit_packed_node_out(self, node): pass

def go(self):
def go(self, root):
self.result = None
# Visiting is a list of IDs of all symbol/intermediate nodes currently in
# the stack. It serves two purposes: to detect when we 'recurse' in and out
# of a symbol/intermediate so that we can process both up and down. Also,
@@ -134,7 +133,7 @@ class ForestVisitor(object):

# We do not use recursion here to walk the Forest due to the limited
# stack size in python. Therefore input_stack is essentially our stack.
input_stack = deque([self.root])
input_stack = deque([root])

# It is much faster to cache these as locals since they are called
# many times in large parses.
@@ -263,19 +262,21 @@ class ForestToTreeVisitor(ForestVisitor):
implementation should be another ForestVisitor which sorts the children
according to some priority mechanism.
"""
def __init__(self, root, forest_sum_visitor = ForestSumVisitor, callbacks = None):
super(ForestToTreeVisitor, self).__init__(root)
self.forest_sum_visitor = forest_sum_visitor
self.output_stack = deque()
__slots__ = ['forest_sum_visitor', 'output_stack', 'callbacks']
def __init__(self, forest_sum_visitor = ForestSumVisitor, callbacks = None):
self.forest_sum_visitor = forest_sum_visitor()
self.callbacks = callbacks
self.result = None

def go(self, root):
self.output_stack = deque()
return super(ForestToTreeVisitor, self).go(root)

def visit_token_node(self, node):
self.output_stack[-1].append(node)

def visit_symbol_node_in(self, node):
if node.is_ambiguous and node.priority is None:
self.forest_sum_visitor(node).go()
self.forest_sum_visitor.go(node)
return next(iter(node.children))

def visit_packed_node_in(self, node):
@@ -311,11 +312,13 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor):
This is mainly used by the test framework, to make it simpler to write
tests ensuring the SPPF contains the right results.
"""
def __init__(self, root, callbacks):
super(ForestToAmbiguousTreeVisitor, self).__init__(root)
self.output_stack = deque()
__slots__ = ['output_stack', 'callbacks']
def __init__(self, callbacks):
self.callbacks = callbacks
self.result = None

def go(self, root):
self.output_stack = deque([])
return super(ForestToAmbiguousTreeVisitor, self).go(root)

def visit_token_node(self, node):
self.output_stack[-1].children.append(node)
@@ -347,4 +350,4 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor):
if self.output_stack:
self.output_stack[-1].children.append(result)
else:
self.result = result
self.result = result

+ 2
- 2
lark/parsers/xearley.py View File

@@ -33,7 +33,6 @@ class Parser:
analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity
self.forest_sum_visitor = forest_sum_visitor
self.ignore = [Terminal(t) for t in ignore]
self.complete_lex = complete_lex

@@ -50,6 +49,7 @@ class Parser:
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)]

self.term_matcher = term_matcher
self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks)

def parse(self, stream, start_symbol=None):
start_symbol = NonTerminal(start_symbol or self.parser_conf.start)
@@ -271,4 +271,4 @@ class Parser:

# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities
# according to the rules.
return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go()
return self.forest_tree_visitor.go(solutions[0])

+ 3
- 3
tests/test_parser.py View File

@@ -239,7 +239,7 @@ def _make_full_earley_test(LEXER):

parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
root_symbol = parser.parse('ab')
ambig_tree = ForestToAmbiguousTreeVisitor(root_symbol, parser.parser.parser.callbacks).go()
ambig_tree = ForestToAmbiguousTreeVisitor(parser.parser.parser.callbacks).go(root_symbol)
# print(ambig_tree.pretty())
self.assertEqual( ambig_tree.data, '_ambig')
self.assertEqual( len(ambig_tree.children), 2)
@@ -255,7 +255,7 @@ def _make_full_earley_test(LEXER):
"""
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
root_symbol = l.parse('cde')
ambig_tree = ForestToAmbiguousTreeVisitor(root_symbol, l.parser.parser.callbacks).go()
ambig_tree = ForestToAmbiguousTreeVisitor(l.parser.parser.callbacks).go(root_symbol)
# print(ambig_tree.pretty())
# tree = ApplyCallbacks(l.parser.parser.postprocess).transform(ambig_tree)

@@ -302,7 +302,7 @@ def _make_full_earley_test(LEXER):
"""
parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
root_symbol = parser.parse('fruit flies like bananas')
tree = ForestToAmbiguousTreeVisitor(root_symbol, parser.parser.parser.callbacks).go()
tree = ForestToAmbiguousTreeVisitor(parser.parser.parser.callbacks).go(root_symbol)
# tree = ApplyCallbacks(parser.parser.parser.postprocess).transform(ambig_tree)

expected = Tree('_ambig', [


Loading…
Cancel
Save