Browse Source

covert to iterating via files instead of pieces..

This uses an index to quickly look up what pieces are part of a
file, and then checks that they are all valid, this should be faster
as it is likely that the torrent has more pieces than files (few
large files, vs many, many small files)..
main
John-Mark Gurney 2 years ago
parent
commit
0865595d3a
1 changed files with 27 additions and 12 deletions
  1. +27
    -12
      __init__.py

+ 27
- 12
__init__.py View File

@@ -21,8 +21,16 @@ class Storage:
self._buildindex() self._buildindex()


def _filepaths(self): def _filepaths(self):
'''Iterates over all the files in the torrent.

Each item is a tuple of:
array of file path components (undecoded)
a pathlib.PurePath for the file
a pathlib.Path for file on disk
'''

for curfile in self._files: for curfile in self._files:
fname = pathlib.Path(
fname = pathlib.PurePath(
*(x.decode(self._encoding) for x in *(x.decode(self._encoding) for x in
curfile['path'])) curfile['path']))
curfilepath = self._rootpath / fname curfilepath = self._rootpath / fname
@@ -34,7 +42,8 @@ class Storage:
yield curfilepath yield curfilepath


def _buildindex(self): def _buildindex(self):
self._index = []
self._pieceindex = []
self._fileindex = {}
files = self._filepaths() files = self._filepaths()
left = 0 left = 0
curfile = None curfile = None
@@ -50,7 +59,9 @@ class Storage:


if left == 0: if left == 0:
current = [] current = []
self._index.append(current)
self._fileindex.setdefault(fname,
[]).append(len(self._pieceindex))
self._pieceindex.append(current)
left = self._piecelen left = self._piecelen


sz = min(curfile['length'] - curfileoff, left) sz = min(curfile['length'] - curfileoff, left)
@@ -61,12 +72,15 @@ class Storage:
curfileoff += sz curfileoff += sz
left -= sz left -= sz


def filepieces(self):
return self._fileindex.items()

def filesforpiece(self, idx): def filesforpiece(self, idx):
for x in self._index[idx]:
for x in self._pieceindex[idx]:
yield x['file'] yield x['file']


def apply_piece(self, idx, fun): def apply_piece(self, idx, fun):
for i in self._index[idx]:
for i in self._pieceindex[idx]:
with open(i['file'], 'rb') as fp: with open(i['file'], 'rb') as fp:
fp.seek(i['offset']) fp.seek(i['offset'])
fun(fp.read(i['size'])) fun(fp.read(i['size']))
@@ -83,7 +97,8 @@ def validate(torrent, basedir):


torrentdir = basedir / info['name'].decode(encoding) torrentdir = basedir / info['name'].decode(encoding)


stor = Storage(torrentdir, info['files'], info['piece length'], encoding)
stor = Storage(torrentdir, info['files'], info['piece length'],
encoding)


pieces = info['pieces'] pieces = info['pieces']
piecescnt = len(pieces) // 20 piecescnt = len(pieces) // 20
@@ -102,10 +117,8 @@ def validate(torrent, basedir):
# if any piece of a file is bad, it's bad # if any piece of a file is bad, it's bad
allfiles = set(stor.allfiles()) allfiles = set(stor.allfiles())


badpieces = [ x for x, v in enumerate(valid) if not v ]

badfiles = reduce(set.__or__, (set(stor.filesforpiece(x)) for x in
badpieces), set())
badfiles = { torrentdir / x for x, y in stor.filepieces() if
not all(valid[i] for i in y) }


return allfiles - badfiles, badfiles return allfiles - badfiles, badfiles


@@ -198,5 +211,7 @@ class _TestCases(unittest.TestCase):


val, inval = validate(self.torrent, self.basetempdir) val, inval = validate(self.torrent, self.basetempdir)


self.assertEqual(set(val), { sd / x for x in missingfiles.keys() if x not in badfiles })
self.assertEqual(set(inval), { sd / x for x in badfiles.keys() })
self.assertEqual(set(val), { sd / x for x in
missingfiles.keys() if x not in badfiles })
self.assertEqual(set(inval), { sd / x for x in
badfiles.keys() })

Loading…
Cancel
Save