Browse Source

covert to iterating via files instead of pieces..

This uses an index to quickly look up what pieces are part of a
file, and then checks that they are all valid, this should be faster
as it is likely that the torrent has more pieces than files (few
large files, vs many, many small files)..
main
John-Mark Gurney 2 years ago
parent
commit
0865595d3a
1 changed files with 27 additions and 12 deletions
  1. +27
    -12
      __init__.py

+ 27
- 12
__init__.py View File

@@ -21,8 +21,16 @@ class Storage:
self._buildindex()

def _filepaths(self):
'''Iterates over all the files in the torrent.

Each item is a tuple of:
array of file path components (undecoded)
a pathlib.PurePath for the file
a pathlib.Path for file on disk
'''

for curfile in self._files:
fname = pathlib.Path(
fname = pathlib.PurePath(
*(x.decode(self._encoding) for x in
curfile['path']))
curfilepath = self._rootpath / fname
@@ -34,7 +42,8 @@ class Storage:
yield curfilepath

def _buildindex(self):
self._index = []
self._pieceindex = []
self._fileindex = {}
files = self._filepaths()
left = 0
curfile = None
@@ -50,7 +59,9 @@ class Storage:

if left == 0:
current = []
self._index.append(current)
self._fileindex.setdefault(fname,
[]).append(len(self._pieceindex))
self._pieceindex.append(current)
left = self._piecelen

sz = min(curfile['length'] - curfileoff, left)
@@ -61,12 +72,15 @@ class Storage:
curfileoff += sz
left -= sz

def filepieces(self):
return self._fileindex.items()

def filesforpiece(self, idx):
for x in self._index[idx]:
for x in self._pieceindex[idx]:
yield x['file']

def apply_piece(self, idx, fun):
for i in self._index[idx]:
for i in self._pieceindex[idx]:
with open(i['file'], 'rb') as fp:
fp.seek(i['offset'])
fun(fp.read(i['size']))
@@ -83,7 +97,8 @@ def validate(torrent, basedir):

torrentdir = basedir / info['name'].decode(encoding)

stor = Storage(torrentdir, info['files'], info['piece length'], encoding)
stor = Storage(torrentdir, info['files'], info['piece length'],
encoding)

pieces = info['pieces']
piecescnt = len(pieces) // 20
@@ -102,10 +117,8 @@ def validate(torrent, basedir):
# if any piece of a file is bad, it's bad
allfiles = set(stor.allfiles())

badpieces = [ x for x, v in enumerate(valid) if not v ]

badfiles = reduce(set.__or__, (set(stor.filesforpiece(x)) for x in
badpieces), set())
badfiles = { torrentdir / x for x, y in stor.filepieces() if
not all(valid[i] for i in y) }

return allfiles - badfiles, badfiles

@@ -198,5 +211,7 @@ class _TestCases(unittest.TestCase):

val, inval = validate(self.torrent, self.basetempdir)

self.assertEqual(set(val), { sd / x for x in missingfiles.keys() if x not in badfiles })
self.assertEqual(set(inval), { sd / x for x in badfiles.keys() })
self.assertEqual(set(val), { sd / x for x in
missingfiles.keys() if x not in badfiles })
self.assertEqual(set(inval), { sd / x for x in
badfiles.keys() })

Loading…
Cancel
Save