Browse Source

support getting file hashes at same time as verification...

main
John-Mark Gurney 2 years ago
parent
commit
b5214e47a4
1 changed files with 92 additions and 11 deletions
  1. +92
    -11
      ui/medashare/btv/__init__.py

+ 92
- 11
ui/medashare/btv/__init__.py View File

@@ -2,7 +2,7 @@
from . import bencode from . import bencode
import fnmatch import fnmatch
from functools import reduce from functools import reduce
from hashlib import sha1
from hashlib import sha1, sha512
import importlib.resources import importlib.resources
import itertools import itertools
import os import os
@@ -56,12 +56,12 @@ class Storage:


Each item is a tuple of: Each item is a tuple of:
array of file path components (undecoded) array of file path components (undecoded)
a pathlib.PurePath for the file
a pathlib.PurePosixPath for the file
a pathlib.Path for file on disk a pathlib.Path for file on disk
''' '''


for curfile in self._files: for curfile in self._files:
fname = pathlib.PurePath(
fname = pathlib.PurePosixPath(
*(x.decode(_encoding) for x in *(x.decode(_encoding) for x in
curfile['path'])) curfile['path']))
curfilepath = self._rootpath / fname curfilepath = self._rootpath / fname
@@ -134,9 +134,21 @@ class Storage:
for i in self._pieceindex[idx]: for i in self._pieceindex[idx]:
with open(i['file'], 'rb') as fp: with open(i['file'], 'rb') as fp:
fp.seek(i['offset']) fp.seek(i['offset'])
fun(fp.read(i['size']))
fun(fp.read(i['size']), i.get('fname'),
i['offset'])

def validate_file(fname, with_file_hashes=None):
'''Take a torrent file fname, find the stored data (searching
subdirectories and verify the torrent. Returns a pair of set, the
first is all the files that are valid, the second are all the
invalid files.

The arg with_file_hashes, if specified, must be a hashlib like
factory function. It will be processed on a per file basis, and
a third argument will be returned as a dict w/ the file name as
key and the digest as the value of each file..
'''


def validate_file(fname):
fname = pathlib.Path(fname) fname = pathlib.Path(fname)


with open(fname, 'rb') as fp: with open(fname, 'rb') as fp:
@@ -148,16 +160,22 @@ def validate_file(fname):
tordir = dirname.parent tordir = dirname.parent


try: try:
return validate(torrent, tordir)
return validate(torrent, tordir, with_file_hashes)
except FileNotFoundError as e: except FileNotFoundError as e:
continue continue
else: else:
raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name))) raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name)))


def validate(torrent, basedir):
def validate(torrent, basedir, with_file_hashes=None):
'''Take a decode torrent file, where it was stored in basedir, '''Take a decode torrent file, where it was stored in basedir,
verify the torrent. Returns a pair of set, the first is all the verify the torrent. Returns a pair of set, the first is all the
files that are valid, the second are all the invalid files.'''
files that are valid, the second are all the invalid files.

The arg with_file_hashes, if specified, must be a hashlib like
factory function. It will be processed on a per file basis, and
a third argument will be returned as a dict w/ the file name as
key and the digest as the value.
'''


info = torrent['info'] info = torrent['info']


@@ -168,6 +186,20 @@ def validate(torrent, basedir):
files = info.get('files', None) files = info.get('files', None)
stor = Storage(torrentdir, files, info['piece length']) stor = Storage(torrentdir, files, info['piece length'])


file_hashes = dict()

def apply_fun(data, fname, offset):
if with_file_hashes is not None:
hashobj, curoff = file_hashes.setdefault(fname,
(with_file_hashes(), 0))

if curoff == offset:
hashobj.update(data)
file_hashes[fname] = (hashobj, offset +
len(data))

hash.update(data)

pieces = info['pieces'] pieces = info['pieces']
piecescnt = len(pieces) // 20 piecescnt = len(pieces) // 20
valid = [ None ] * piecescnt valid = [ None ] * piecescnt
@@ -175,13 +207,20 @@ def validate(torrent, basedir):
20)): 20)):
hash = sha1() hash = sha1()


stor.apply_piece(num, hash.update)
stor.apply_piece(num, apply_fun)


if hash.digest() == i: if hash.digest() == i:
valid[num] = True valid[num] = True
else: else:
valid[num] = False valid[num] = False


if files is None:
filesizes = { pathlib.PurePosixPath(info['name'].decode(
_encoding)): info['length'] }
else:
filesizes = { pathlib.PurePosixPath(*(x.decode(_encoding) for
x in o['path'])): o['length'] for o in files }

if files is None: if files is None:
# single file # single file
f, e = set([ torrentdir ]), set() f, e = set([ torrentdir ]), set()
@@ -189,7 +228,13 @@ def validate(torrent, basedir):
if not all(valid): if not all(valid):
f, e = e, f f, e = e, f


return f,e
if with_file_hashes:
file_hashes = { torrentdir: hashobj.digest() for fname, (hashobj,
off) in file_hashes.items() if info['length'] == off and
torrentdir in f }
return f, e, file_hashes

return f, e


# if any piece of a file is bad, it's bad # if any piece of a file is bad, it's bad
allfiles = set(stor.allfiles()) allfiles = set(stor.allfiles())
@@ -197,7 +242,15 @@ def validate(torrent, basedir):
badfiles = { torrentdir / x for x, y in stor.filepieces() if badfiles = { torrentdir / x for x, y in stor.filepieces() if
not all(valid[i] for i in y) } not all(valid[i] for i in y) }


return allfiles - badfiles, badfiles
r = (allfiles - badfiles, badfiles,)

file_hashes = { torrentdir / fname: hashobj.digest() for fname, (hashobj,
off) in file_hashes.items() if filesizes[fname] == off and
(torrentdir / fname) in r[0] }

if with_file_hashes is not None:
r += (file_hashes, )
return r


class _TestCases(unittest.TestCase): class _TestCases(unittest.TestCase):
dirname = 'somedir' dirname = 'somedir'
@@ -320,6 +373,21 @@ class _TestCases(unittest.TestCase):
self.assertFalse(bad) self.assertFalse(bad)
self.assertEqual(good, { sd / 'filed.txt' }) self.assertEqual(good, { sd / 'filed.txt' })


good, bad, hashes = validate_file(tor, with_file_hashes=sha512)

self.assertFalse(bad)
self.assertEqual(good, { sd / 'filed.txt' })
self.assertEqual(hashes, { sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), })

with open(sd / 'filed.txt', 'w') as fp:
fp.write('weoifj')

good, bad, hashes = validate_file(tor, with_file_hashes=sha512)

self.assertEqual(bad, { sd / 'filed.txt' })
self.assertFalse(good)
self.assertEqual(hashes, {})

def test_verification(self): def test_verification(self):
# Testing for "missing" files # Testing for "missing" files
# piece size 2 (aka 4 bytes) # piece size 2 (aka 4 bytes)
@@ -345,3 +413,16 @@ class _TestCases(unittest.TestCase):
missingfiles.keys() if x not in self.badfiles }) missingfiles.keys() if x not in self.badfiles })
self.assertEqual(set(inval), { sd / x for x in self.assertEqual(set(inval), { sd / x for x in
self.badfiles.keys() }) self.badfiles.keys() })

val, inval, hashdict = validate(self.torrent, self.basetempdir,
with_file_hashes=sha512)

self.assertEqual(set(val), { sd / x for x in
missingfiles.keys() if x not in self.badfiles })
self.assertEqual(set(inval), { sd / x for x in
self.badfiles.keys() })
self.assertEqual(hashdict, {
sd / 'fileb.txt': bytes.fromhex('cc06808cbbee0510331aa97974132e8dc296aeb795be229d064bae784b0a87a5cf4281d82e8c99271b75db2148f08a026c1a60ed9cabdb8cac6d24242dac4063'),
sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'),
sd / 'filef/filef.txt': bytes.fromhex('be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09'),
})

Loading…
Cancel
Save