When using the same parser repeatedly for small parsers we incur significant overhead by recreating the ForestVisitor each parser. We can cache the Forest walker and re-use it by making it stateless. Also, we can use slots for all of the Forest Walkers to reduce construction delay and function call overhead.tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.6
@@ -26,7 +26,6 @@ class Parser: | |||
analysis = GrammarAnalyzer(parser_conf) | |||
self.parser_conf = parser_conf | |||
self.resolve_ambiguity = resolve_ambiguity | |||
self.forest_sum_visitor = forest_sum_visitor | |||
self.FIRST = analysis.FIRST | |||
self.callbacks = {} | |||
@@ -41,6 +40,7 @@ class Parser: | |||
self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias) | |||
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] | |||
self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks) | |||
self.term_matcher = term_matcher | |||
@@ -203,7 +203,7 @@ class Parser: | |||
# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities | |||
# according to the rules. | |||
return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go() | |||
return self.forest_tree_visitor.go(solutions[0]) | |||
class ApplyCallbacks(Transformer_InPlace): | |||
def __init__(self, postprocess): | |||
@@ -114,9 +114,7 @@ class ForestVisitor(object): | |||
Use this as a base when you need to walk the forest. | |||
""" | |||
def __init__(self, root): | |||
self.root = root | |||
self.result = None | |||
__slots__ = ['result'] | |||
def visit_token_node(self, node): pass | |||
def visit_symbol_node_in(self, node): pass | |||
@@ -124,7 +122,8 @@ class ForestVisitor(object): | |||
def visit_packed_node_in(self, node): pass | |||
def visit_packed_node_out(self, node): pass | |||
def go(self): | |||
def go(self, root): | |||
self.result = None | |||
# Visiting is a list of IDs of all symbol/intermediate nodes currently in | |||
# the stack. It serves two purposes: to detect when we 'recurse' in and out | |||
# of a symbol/intermediate so that we can process both up and down. Also, | |||
@@ -134,7 +133,7 @@ class ForestVisitor(object): | |||
# We do not use recursion here to walk the Forest due to the limited | |||
# stack size in python. Therefore input_stack is essentially our stack. | |||
input_stack = deque([self.root]) | |||
input_stack = deque([root]) | |||
# It is much faster to cache these as locals since they are called | |||
# many times in large parses. | |||
@@ -263,19 +262,21 @@ class ForestToTreeVisitor(ForestVisitor): | |||
implementation should be another ForestVisitor which sorts the children | |||
according to some priority mechanism. | |||
""" | |||
def __init__(self, root, forest_sum_visitor = ForestSumVisitor, callbacks = None): | |||
super(ForestToTreeVisitor, self).__init__(root) | |||
self.forest_sum_visitor = forest_sum_visitor | |||
self.output_stack = deque() | |||
__slots__ = ['forest_sum_visitor', 'output_stack', 'callbacks'] | |||
def __init__(self, forest_sum_visitor = ForestSumVisitor, callbacks = None): | |||
self.forest_sum_visitor = forest_sum_visitor() | |||
self.callbacks = callbacks | |||
self.result = None | |||
def go(self, root): | |||
self.output_stack = deque() | |||
return super(ForestToTreeVisitor, self).go(root) | |||
def visit_token_node(self, node): | |||
self.output_stack[-1].append(node) | |||
def visit_symbol_node_in(self, node): | |||
if node.is_ambiguous and node.priority is None: | |||
self.forest_sum_visitor(node).go() | |||
self.forest_sum_visitor.go(node) | |||
return next(iter(node.children)) | |||
def visit_packed_node_in(self, node): | |||
@@ -311,11 +312,13 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor): | |||
This is mainly used by the test framework, to make it simpler to write | |||
tests ensuring the SPPF contains the right results. | |||
""" | |||
def __init__(self, root, callbacks): | |||
super(ForestToAmbiguousTreeVisitor, self).__init__(root) | |||
self.output_stack = deque() | |||
__slots__ = ['output_stack', 'callbacks'] | |||
def __init__(self, callbacks): | |||
self.callbacks = callbacks | |||
self.result = None | |||
def go(self, root): | |||
self.output_stack = deque([]) | |||
return super(ForestToAmbiguousTreeVisitor, self).go(root) | |||
def visit_token_node(self, node): | |||
self.output_stack[-1].children.append(node) | |||
@@ -347,4 +350,4 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor): | |||
if self.output_stack: | |||
self.output_stack[-1].children.append(result) | |||
else: | |||
self.result = result | |||
self.result = result |
@@ -33,7 +33,6 @@ class Parser: | |||
analysis = GrammarAnalyzer(parser_conf) | |||
self.parser_conf = parser_conf | |||
self.resolve_ambiguity = resolve_ambiguity | |||
self.forest_sum_visitor = forest_sum_visitor | |||
self.ignore = [Terminal(t) for t in ignore] | |||
self.complete_lex = complete_lex | |||
@@ -50,6 +49,7 @@ class Parser: | |||
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] | |||
self.term_matcher = term_matcher | |||
self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks) | |||
def parse(self, stream, start_symbol=None): | |||
start_symbol = NonTerminal(start_symbol or self.parser_conf.start) | |||
@@ -271,4 +271,4 @@ class Parser: | |||
# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities | |||
# according to the rules. | |||
return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go() | |||
return self.forest_tree_visitor.go(solutions[0]) |
@@ -239,7 +239,7 @@ def _make_full_earley_test(LEXER): | |||
parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') | |||
root_symbol = parser.parse('ab') | |||
ambig_tree = ForestToAmbiguousTreeVisitor(root_symbol, parser.parser.parser.callbacks).go() | |||
ambig_tree = ForestToAmbiguousTreeVisitor(parser.parser.parser.callbacks).go(root_symbol) | |||
# print(ambig_tree.pretty()) | |||
self.assertEqual( ambig_tree.data, '_ambig') | |||
self.assertEqual( len(ambig_tree.children), 2) | |||
@@ -255,7 +255,7 @@ def _make_full_earley_test(LEXER): | |||
""" | |||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | |||
root_symbol = l.parse('cde') | |||
ambig_tree = ForestToAmbiguousTreeVisitor(root_symbol, l.parser.parser.callbacks).go() | |||
ambig_tree = ForestToAmbiguousTreeVisitor(l.parser.parser.callbacks).go(root_symbol) | |||
# print(ambig_tree.pretty()) | |||
# tree = ApplyCallbacks(l.parser.parser.postprocess).transform(ambig_tree) | |||
@@ -302,7 +302,7 @@ def _make_full_earley_test(LEXER): | |||
""" | |||
parser = Lark(grammar, ambiguity='explicit', lexer=LEXER) | |||
root_symbol = parser.parse('fruit flies like bananas') | |||
tree = ForestToAmbiguousTreeVisitor(root_symbol, parser.parser.parser.callbacks).go() | |||
tree = ForestToAmbiguousTreeVisitor(parser.parser.parser.callbacks).go(root_symbol) | |||
# tree = ApplyCallbacks(parser.parser.parser.postprocess).transform(ambig_tree) | |||
expected = Tree('_ambig', [ | |||