support getting file hashes at same time as verification...

2 years ago · b5214e47a4
--- a/ui/medashare/btv/init.py
+++ b/ui/medashare/btv/init.py
@@ -2,7 +2,7 @@
 from . import bencode
 import fnmatch
 from functools import reduce
 from hashlib import sha1
 from hashlib import sha1, sha512
 import importlib.resources
 import itertools
 import os
@@ -56,12 +56,12 @@ class Storage:
 		Each item is a tuple of:
 			array of file path components (undecoded)
 			a pathlib.PurePath for the file
 			a pathlib.PurePosixPath for the file
 			a pathlib.Path for file on disk
 		'''
 		for curfile in self._files:
 			fname = pathlib.PurePath(
 			fname = pathlib.PurePosixPath(
 			    *(x.decode(_encoding) for x in
 			    curfile['path']))
 			curfilepath = self._rootpath / fname
@@ -134,9 +134,21 @@ class Storage:
 		for i in self._pieceindex[idx]:
 			with open(i['file'], 'rb') as fp:
 				fp.seek(i['offset'])
 				fun(fp.read(i['size']))
 				fun(fp.read(i['size']), i.get('fname'),
 				    i['offset'])
 def validate_file(fname, with_file_hashes=None):
 	'''Take a torrent file fname, find the stored data (searching
 	subdirectories and verify the torrent.  Returns a pair of set, the
 	first is all the files that are valid, the second are all the
 	invalid files.
 	The arg with_file_hashes, if specified, must be a hashlib like
 	factory function.  It will be processed on a per file basis, and
 	a third argument will be returned as a dict w/ the file name as
 	key and the digest as the value of each file..
 	'''
 def validate_file(fname):
 	fname = pathlib.Path(fname)
 	with open(fname, 'rb') as fp:
@@ -148,16 +160,22 @@ def validate_file(fname):
 		tordir = dirname.parent
 		try:
 			return validate(torrent, tordir)
 			return validate(torrent, tordir, with_file_hashes)
 		except FileNotFoundError as e:
 			continue
 	else:
 		raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name)))
 def validate(torrent, basedir):
 def validate(torrent, basedir, with_file_hashes=None):
 	'''Take a decode torrent file, where it was stored in basedir,
 	verify the torrent.  Returns a pair of set, the first is all the
 	files that are valid, the second are all the invalid files.'''
 	files that are valid, the second are all the invalid files.
 	The arg with_file_hashes, if specified, must be a hashlib like
 	factory function.  It will be processed on a per file basis, and
 	a third argument will be returned as a dict w/ the file name as
 	key and the digest as the value.
 	'''
 	info = torrent['info']
@@ -168,6 +186,20 @@ def validate(torrent, basedir):
 	files = info.get('files', None)
 	stor = Storage(torrentdir, files, info['piece length'])
 	file_hashes = dict()
 	def apply_fun(data, fname, offset):
 		if with_file_hashes is not None:
 			hashobj, curoff = file_hashes.setdefault(fname,
 			    (with_file_hashes(), 0))
 			if curoff == offset:
 				hashobj.update(data)
 				file_hashes[fname] = (hashobj, offset +
 				    len(data))
 		hash.update(data)
 	pieces = info['pieces']
 	piecescnt = len(pieces) // 20
 	valid = [ None ] * piecescnt
@@ -175,13 +207,20 @@ def validate(torrent, basedir):
 	    20)):
 		hash = sha1()
 		stor.apply_piece(num, hash.update)
 		stor.apply_piece(num, apply_fun)
 		if hash.digest() == i:
 			valid[num] = True
 		else:
 			valid[num] = False
 	if files is None:
 		filesizes = { pathlib.PurePosixPath(info['name'].decode(
 		    _encoding)): info['length'] }
 	else:
 		filesizes = { pathlib.PurePosixPath(*(x.decode(_encoding) for
 		    x in o['path'])): o['length'] for o in files }
 	if files is None:
 		# single file
 		f, e = set([ torrentdir ]), set()
@@ -189,7 +228,13 @@ def validate(torrent, basedir):
 		if not all(valid):
 			f, e = e, f
 		return f,e
 		if with_file_hashes:
 			file_hashes = { torrentdir: hashobj.digest() for fname, (hashobj,
 			    off) in file_hashes.items() if info['length'] == off and
 			    torrentdir in f }
 			return f, e, file_hashes
 		return f, e
 	# if any piece of a file is bad, it's bad
 	allfiles = set(stor.allfiles())
@@ -197,7 +242,15 @@ def validate(torrent, basedir):
 	badfiles = { torrentdir / x for x, y in stor.filepieces() if
 	    not all(valid[i] for i in y) }
 	return allfiles - badfiles, badfiles
 	r = (allfiles - badfiles, badfiles,)
 	file_hashes = { torrentdir / fname: hashobj.digest() for fname, (hashobj,
 	    off) in file_hashes.items() if filesizes[fname] == off and
 	    (torrentdir / fname) in r[0] }
 	if with_file_hashes is not None:
 		r += (file_hashes, )
 	return r
 class _TestCases(unittest.TestCase):
 	dirname = 'somedir'
@@ -320,6 +373,21 @@ class _TestCases(unittest.TestCase):
 		self.assertFalse(bad)
 		self.assertEqual(good, { sd / 'filed.txt' })
 		good, bad, hashes = validate_file(tor, with_file_hashes=sha512)
 		self.assertFalse(bad)
 		self.assertEqual(good, { sd / 'filed.txt' })
 		self.assertEqual(hashes, { sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), })
 		with open(sd / 'filed.txt', 'w') as fp:
 			fp.write('weoifj')
 		good, bad, hashes = validate_file(tor, with_file_hashes=sha512)
 		self.assertEqual(bad, { sd / 'filed.txt' })
 		self.assertFalse(good)
 		self.assertEqual(hashes, {})
 	def test_verification(self):
 		# Testing for "missing" files
 		# piece size 2 (aka 4 bytes)
@@ -345,3 +413,16 @@ class _TestCases(unittest.TestCase):
 		    missingfiles.keys() if x not in self.badfiles })
 		self.assertEqual(set(inval), { sd / x for x in
 		    self.badfiles.keys() })
 		val, inval, hashdict = validate(self.torrent, self.basetempdir,
 		    with_file_hashes=sha512)
 		self.assertEqual(set(val), { sd / x for x in
 		    missingfiles.keys() if x not in self.badfiles })
 		self.assertEqual(set(inval), { sd / x for x in
 		    self.badfiles.keys() })
 		self.assertEqual(hashdict, {
 			sd / 'fileb.txt': bytes.fromhex('cc06808cbbee0510331aa97974132e8dc296aeb795be229d064bae784b0a87a5cf4281d82e8c99271b75db2148f08a026c1a60ed9cabdb8cac6d24242dac4063'),
 			sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'),
 			sd / 'filef/filef.txt': bytes.fromhex('be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09'),
 		})