Browse Source

Rewrite iter_subtrees to more efficient version

Using OrderedDict provides these properties:
- given subtree is yielded only once since it's stored in OrderedDict
only once (even though it may be put there multiple times) so no need
to double check if subtree was already seen
- order of iteration is preserved as it was previously when subtrees
to iterate over were stored in a list
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.6
Blank Spruce 5 years ago
parent
commit
b18d109886
1 changed files with 12 additions and 19 deletions
  1. +12
    -19
      lark/tree.py

+ 12
- 19
lark/tree.py View File

@@ -4,6 +4,7 @@ except ImportError:
pass

from copy import deepcopy
from collections import OrderedDict


###{standalone
@@ -58,25 +59,17 @@ class Tree(object):
return hash((self.data, tuple(self.children)))

def iter_subtrees(self):
# TODO: Re-write as a more efficient version

visited = set()
q = [self]

l = []
while q:
subtree = q.pop()
l.append( subtree )
if id(subtree) in visited:
continue # already been here from another branch
visited.add(id(subtree))
q += [c for c in subtree.children if isinstance(c, Tree)]

seen = set()
for x in reversed(l):
if id(x) not in seen:
yield x
seen.add(id(x))
queue = [self]
subtrees = OrderedDict()
for subtree in queue:
if id(subtree) in subtrees:
continue
subtrees[id(subtree)] = subtree
queue += [c for c in reversed(subtree.children) if isinstance(c, Tree)]

del queue
for subtree in reversed(list(subtrees.values())):
yield subtree

def find_pred(self, pred):
"Find all nodes where pred(tree) == True"


Loading…
Cancel
Save