From b18d1098867682d9d32032683d6ab45a1e3c17da Mon Sep 17 00:00:00 2001 From: Blank Spruce <32396809+BlankSpruce@users.noreply.github.com> Date: Sun, 3 May 2020 20:50:50 +0200 Subject: [PATCH] Rewrite iter_subtrees to more efficient version Using OrderedDict provides these properties: - given subtree is yielded only once since it's stored in OrderedDict only once (even though it may be put there multiple times) so no need to double check if subtree was already seen - order of iteration is preserved as it was previously when subtrees to iterate over were stored in a list --- lark/tree.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/lark/tree.py b/lark/tree.py index ee8dfb7..e2e41d9 100644 --- a/lark/tree.py +++ b/lark/tree.py @@ -4,6 +4,7 @@ except ImportError: pass from copy import deepcopy +from collections import OrderedDict ###{standalone @@ -58,25 +59,17 @@ class Tree(object): return hash((self.data, tuple(self.children))) def iter_subtrees(self): - # TODO: Re-write as a more efficient version - - visited = set() - q = [self] - - l = [] - while q: - subtree = q.pop() - l.append( subtree ) - if id(subtree) in visited: - continue # already been here from another branch - visited.add(id(subtree)) - q += [c for c in subtree.children if isinstance(c, Tree)] - - seen = set() - for x in reversed(l): - if id(x) not in seen: - yield x - seen.add(id(x)) + queue = [self] + subtrees = OrderedDict() + for subtree in queue: + if id(subtree) in subtrees: + continue + subtrees[id(subtree)] = subtree + queue += [c for c in reversed(subtree.children) if isinstance(c, Tree)] + + del queue + for subtree in reversed(list(subtrees.values())): + yield subtree def find_pred(self, pred): "Find all nodes where pred(tree) == True"