When using the same parser repeatedly for small parsers we incur significant overhead by recreating the ForestVisitor each parser. We can cache the Forest walker and re-use it by making it stateless. Also, we can use slots for all of the Forest Walkers to reduce construction delay and function call overhead.tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.6
@@ -26,7 +26,6 @@ class Parser: | |||||
analysis = GrammarAnalyzer(parser_conf) | analysis = GrammarAnalyzer(parser_conf) | ||||
self.parser_conf = parser_conf | self.parser_conf = parser_conf | ||||
self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
self.forest_sum_visitor = forest_sum_visitor | |||||
self.FIRST = analysis.FIRST | self.FIRST = analysis.FIRST | ||||
self.callbacks = {} | self.callbacks = {} | ||||
@@ -41,6 +40,7 @@ class Parser: | |||||
self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias) | self.callbacks[rule] = rule.alias if callable(rule.alias) else getattr(parser_conf.callback, rule.alias) | ||||
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] | self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] | ||||
self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks) | |||||
self.term_matcher = term_matcher | self.term_matcher = term_matcher | ||||
@@ -203,7 +203,7 @@ class Parser: | |||||
# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities | # ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities | ||||
# according to the rules. | # according to the rules. | ||||
return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go() | |||||
return self.forest_tree_visitor.go(solutions[0]) | |||||
class ApplyCallbacks(Transformer_InPlace): | class ApplyCallbacks(Transformer_InPlace): | ||||
def __init__(self, postprocess): | def __init__(self, postprocess): | ||||
@@ -114,9 +114,7 @@ class ForestVisitor(object): | |||||
Use this as a base when you need to walk the forest. | Use this as a base when you need to walk the forest. | ||||
""" | """ | ||||
def __init__(self, root): | |||||
self.root = root | |||||
self.result = None | |||||
__slots__ = ['result'] | |||||
def visit_token_node(self, node): pass | def visit_token_node(self, node): pass | ||||
def visit_symbol_node_in(self, node): pass | def visit_symbol_node_in(self, node): pass | ||||
@@ -124,7 +122,8 @@ class ForestVisitor(object): | |||||
def visit_packed_node_in(self, node): pass | def visit_packed_node_in(self, node): pass | ||||
def visit_packed_node_out(self, node): pass | def visit_packed_node_out(self, node): pass | ||||
def go(self): | |||||
def go(self, root): | |||||
self.result = None | |||||
# Visiting is a list of IDs of all symbol/intermediate nodes currently in | # Visiting is a list of IDs of all symbol/intermediate nodes currently in | ||||
# the stack. It serves two purposes: to detect when we 'recurse' in and out | # the stack. It serves two purposes: to detect when we 'recurse' in and out | ||||
# of a symbol/intermediate so that we can process both up and down. Also, | # of a symbol/intermediate so that we can process both up and down. Also, | ||||
@@ -134,7 +133,7 @@ class ForestVisitor(object): | |||||
# We do not use recursion here to walk the Forest due to the limited | # We do not use recursion here to walk the Forest due to the limited | ||||
# stack size in python. Therefore input_stack is essentially our stack. | # stack size in python. Therefore input_stack is essentially our stack. | ||||
input_stack = deque([self.root]) | |||||
input_stack = deque([root]) | |||||
# It is much faster to cache these as locals since they are called | # It is much faster to cache these as locals since they are called | ||||
# many times in large parses. | # many times in large parses. | ||||
@@ -263,19 +262,21 @@ class ForestToTreeVisitor(ForestVisitor): | |||||
implementation should be another ForestVisitor which sorts the children | implementation should be another ForestVisitor which sorts the children | ||||
according to some priority mechanism. | according to some priority mechanism. | ||||
""" | """ | ||||
def __init__(self, root, forest_sum_visitor = ForestSumVisitor, callbacks = None): | |||||
super(ForestToTreeVisitor, self).__init__(root) | |||||
self.forest_sum_visitor = forest_sum_visitor | |||||
self.output_stack = deque() | |||||
__slots__ = ['forest_sum_visitor', 'output_stack', 'callbacks'] | |||||
def __init__(self, forest_sum_visitor = ForestSumVisitor, callbacks = None): | |||||
self.forest_sum_visitor = forest_sum_visitor() | |||||
self.callbacks = callbacks | self.callbacks = callbacks | ||||
self.result = None | |||||
def go(self, root): | |||||
self.output_stack = deque() | |||||
return super(ForestToTreeVisitor, self).go(root) | |||||
def visit_token_node(self, node): | def visit_token_node(self, node): | ||||
self.output_stack[-1].append(node) | self.output_stack[-1].append(node) | ||||
def visit_symbol_node_in(self, node): | def visit_symbol_node_in(self, node): | ||||
if node.is_ambiguous and node.priority is None: | if node.is_ambiguous and node.priority is None: | ||||
self.forest_sum_visitor(node).go() | |||||
self.forest_sum_visitor.go(node) | |||||
return next(iter(node.children)) | return next(iter(node.children)) | ||||
def visit_packed_node_in(self, node): | def visit_packed_node_in(self, node): | ||||
@@ -311,11 +312,13 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor): | |||||
This is mainly used by the test framework, to make it simpler to write | This is mainly used by the test framework, to make it simpler to write | ||||
tests ensuring the SPPF contains the right results. | tests ensuring the SPPF contains the right results. | ||||
""" | """ | ||||
def __init__(self, root, callbacks): | |||||
super(ForestToAmbiguousTreeVisitor, self).__init__(root) | |||||
self.output_stack = deque() | |||||
__slots__ = ['output_stack', 'callbacks'] | |||||
def __init__(self, callbacks): | |||||
self.callbacks = callbacks | self.callbacks = callbacks | ||||
self.result = None | |||||
def go(self, root): | |||||
self.output_stack = deque([]) | |||||
return super(ForestToAmbiguousTreeVisitor, self).go(root) | |||||
def visit_token_node(self, node): | def visit_token_node(self, node): | ||||
self.output_stack[-1].children.append(node) | self.output_stack[-1].children.append(node) | ||||
@@ -347,4 +350,4 @@ class ForestToAmbiguousTreeVisitor(ForestVisitor): | |||||
if self.output_stack: | if self.output_stack: | ||||
self.output_stack[-1].children.append(result) | self.output_stack[-1].children.append(result) | ||||
else: | else: | ||||
self.result = result | |||||
self.result = result |
@@ -33,7 +33,6 @@ class Parser: | |||||
analysis = GrammarAnalyzer(parser_conf) | analysis = GrammarAnalyzer(parser_conf) | ||||
self.parser_conf = parser_conf | self.parser_conf = parser_conf | ||||
self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
self.forest_sum_visitor = forest_sum_visitor | |||||
self.ignore = [Terminal(t) for t in ignore] | self.ignore = [Terminal(t) for t in ignore] | ||||
self.complete_lex = complete_lex | self.complete_lex = complete_lex | ||||
@@ -50,6 +49,7 @@ class Parser: | |||||
self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] | self.predictions[rule.origin] = [x.rule for x in analysis.expand_rule(rule.origin)] | ||||
self.term_matcher = term_matcher | self.term_matcher = term_matcher | ||||
self.forest_tree_visitor = ForestToTreeVisitor(forest_sum_visitor, self.callbacks) | |||||
def parse(self, stream, start_symbol=None): | def parse(self, stream, start_symbol=None): | ||||
start_symbol = NonTerminal(start_symbol or self.parser_conf.start) | start_symbol = NonTerminal(start_symbol or self.parser_conf.start) | ||||
@@ -271,4 +271,4 @@ class Parser: | |||||
# ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities | # ... otherwise, disambiguate and convert the SPPF to an AST, removing any ambiguities | ||||
# according to the rules. | # according to the rules. | ||||
return ForestToTreeVisitor(solutions[0], self.forest_sum_visitor, self.callbacks).go() | |||||
return self.forest_tree_visitor.go(solutions[0]) |
@@ -239,7 +239,7 @@ def _make_full_earley_test(LEXER): | |||||
parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') | parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') | ||||
root_symbol = parser.parse('ab') | root_symbol = parser.parse('ab') | ||||
ambig_tree = ForestToAmbiguousTreeVisitor(root_symbol, parser.parser.parser.callbacks).go() | |||||
ambig_tree = ForestToAmbiguousTreeVisitor(parser.parser.parser.callbacks).go(root_symbol) | |||||
# print(ambig_tree.pretty()) | # print(ambig_tree.pretty()) | ||||
self.assertEqual( ambig_tree.data, '_ambig') | self.assertEqual( ambig_tree.data, '_ambig') | ||||
self.assertEqual( len(ambig_tree.children), 2) | self.assertEqual( len(ambig_tree.children), 2) | ||||
@@ -255,7 +255,7 @@ def _make_full_earley_test(LEXER): | |||||
""" | """ | ||||
l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER) | ||||
root_symbol = l.parse('cde') | root_symbol = l.parse('cde') | ||||
ambig_tree = ForestToAmbiguousTreeVisitor(root_symbol, l.parser.parser.callbacks).go() | |||||
ambig_tree = ForestToAmbiguousTreeVisitor(l.parser.parser.callbacks).go(root_symbol) | |||||
# print(ambig_tree.pretty()) | # print(ambig_tree.pretty()) | ||||
# tree = ApplyCallbacks(l.parser.parser.postprocess).transform(ambig_tree) | # tree = ApplyCallbacks(l.parser.parser.postprocess).transform(ambig_tree) | ||||
@@ -302,7 +302,7 @@ def _make_full_earley_test(LEXER): | |||||
""" | """ | ||||
parser = Lark(grammar, ambiguity='explicit', lexer=LEXER) | parser = Lark(grammar, ambiguity='explicit', lexer=LEXER) | ||||
root_symbol = parser.parse('fruit flies like bananas') | root_symbol = parser.parse('fruit flies like bananas') | ||||
tree = ForestToAmbiguousTreeVisitor(root_symbol, parser.parser.parser.callbacks).go() | |||||
tree = ForestToAmbiguousTreeVisitor(parser.parser.parser.callbacks).go(root_symbol) | |||||
# tree = ApplyCallbacks(parser.parser.parser.postprocess).transform(ambig_tree) | # tree = ApplyCallbacks(parser.parser.parser.postprocess).transform(ambig_tree) | ||||
expected = Tree('_ambig', [ | expected = Tree('_ambig', [ | ||||