From b5214e47a4400634b02c1c43e4e0d4320026d330 Mon Sep 17 00:00:00 2001 From: John-Mark Gurney Date: Sat, 17 Sep 2022 21:20:07 -0700 Subject: [PATCH] support getting file hashes at same time as verification... --- ui/medashare/btv/__init__.py | 103 +++++++++++++++++++++++++++++++---- 1 file changed, 92 insertions(+), 11 deletions(-) diff --git a/ui/medashare/btv/__init__.py b/ui/medashare/btv/__init__.py index 8c1b9ae..a92f748 100644 --- a/ui/medashare/btv/__init__.py +++ b/ui/medashare/btv/__init__.py @@ -2,7 +2,7 @@ from . import bencode import fnmatch from functools import reduce -from hashlib import sha1 +from hashlib import sha1, sha512 import importlib.resources import itertools import os @@ -56,12 +56,12 @@ class Storage: Each item is a tuple of: array of file path components (undecoded) - a pathlib.PurePath for the file + a pathlib.PurePosixPath for the file a pathlib.Path for file on disk ''' for curfile in self._files: - fname = pathlib.PurePath( + fname = pathlib.PurePosixPath( *(x.decode(_encoding) for x in curfile['path'])) curfilepath = self._rootpath / fname @@ -134,9 +134,21 @@ class Storage: for i in self._pieceindex[idx]: with open(i['file'], 'rb') as fp: fp.seek(i['offset']) - fun(fp.read(i['size'])) + fun(fp.read(i['size']), i.get('fname'), + i['offset']) + +def validate_file(fname, with_file_hashes=None): + '''Take a torrent file fname, find the stored data (searching + subdirectories and verify the torrent. Returns a pair of set, the + first is all the files that are valid, the second are all the + invalid files. + + The arg with_file_hashes, if specified, must be a hashlib like + factory function. It will be processed on a per file basis, and + a third argument will be returned as a dict w/ the file name as + key and the digest as the value of each file.. + ''' -def validate_file(fname): fname = pathlib.Path(fname) with open(fname, 'rb') as fp: @@ -148,16 +160,22 @@ def validate_file(fname): tordir = dirname.parent try: - return validate(torrent, tordir) + return validate(torrent, tordir, with_file_hashes) except FileNotFoundError as e: continue else: raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name))) -def validate(torrent, basedir): +def validate(torrent, basedir, with_file_hashes=None): '''Take a decode torrent file, where it was stored in basedir, verify the torrent. Returns a pair of set, the first is all the - files that are valid, the second are all the invalid files.''' + files that are valid, the second are all the invalid files. + + The arg with_file_hashes, if specified, must be a hashlib like + factory function. It will be processed on a per file basis, and + a third argument will be returned as a dict w/ the file name as + key and the digest as the value. + ''' info = torrent['info'] @@ -168,6 +186,20 @@ def validate(torrent, basedir): files = info.get('files', None) stor = Storage(torrentdir, files, info['piece length']) + file_hashes = dict() + + def apply_fun(data, fname, offset): + if with_file_hashes is not None: + hashobj, curoff = file_hashes.setdefault(fname, + (with_file_hashes(), 0)) + + if curoff == offset: + hashobj.update(data) + file_hashes[fname] = (hashobj, offset + + len(data)) + + hash.update(data) + pieces = info['pieces'] piecescnt = len(pieces) // 20 valid = [ None ] * piecescnt @@ -175,13 +207,20 @@ def validate(torrent, basedir): 20)): hash = sha1() - stor.apply_piece(num, hash.update) + stor.apply_piece(num, apply_fun) if hash.digest() == i: valid[num] = True else: valid[num] = False + if files is None: + filesizes = { pathlib.PurePosixPath(info['name'].decode( + _encoding)): info['length'] } + else: + filesizes = { pathlib.PurePosixPath(*(x.decode(_encoding) for + x in o['path'])): o['length'] for o in files } + if files is None: # single file f, e = set([ torrentdir ]), set() @@ -189,7 +228,13 @@ def validate(torrent, basedir): if not all(valid): f, e = e, f - return f,e + if with_file_hashes: + file_hashes = { torrentdir: hashobj.digest() for fname, (hashobj, + off) in file_hashes.items() if info['length'] == off and + torrentdir in f } + return f, e, file_hashes + + return f, e # if any piece of a file is bad, it's bad allfiles = set(stor.allfiles()) @@ -197,7 +242,15 @@ def validate(torrent, basedir): badfiles = { torrentdir / x for x, y in stor.filepieces() if not all(valid[i] for i in y) } - return allfiles - badfiles, badfiles + r = (allfiles - badfiles, badfiles,) + + file_hashes = { torrentdir / fname: hashobj.digest() for fname, (hashobj, + off) in file_hashes.items() if filesizes[fname] == off and + (torrentdir / fname) in r[0] } + + if with_file_hashes is not None: + r += (file_hashes, ) + return r class _TestCases(unittest.TestCase): dirname = 'somedir' @@ -320,6 +373,21 @@ class _TestCases(unittest.TestCase): self.assertFalse(bad) self.assertEqual(good, { sd / 'filed.txt' }) + good, bad, hashes = validate_file(tor, with_file_hashes=sha512) + + self.assertFalse(bad) + self.assertEqual(good, { sd / 'filed.txt' }) + self.assertEqual(hashes, { sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), }) + + with open(sd / 'filed.txt', 'w') as fp: + fp.write('weoifj') + + good, bad, hashes = validate_file(tor, with_file_hashes=sha512) + + self.assertEqual(bad, { sd / 'filed.txt' }) + self.assertFalse(good) + self.assertEqual(hashes, {}) + def test_verification(self): # Testing for "missing" files # piece size 2 (aka 4 bytes) @@ -345,3 +413,16 @@ class _TestCases(unittest.TestCase): missingfiles.keys() if x not in self.badfiles }) self.assertEqual(set(inval), { sd / x for x in self.badfiles.keys() }) + + val, inval, hashdict = validate(self.torrent, self.basetempdir, + with_file_hashes=sha512) + + self.assertEqual(set(val), { sd / x for x in + missingfiles.keys() if x not in self.badfiles }) + self.assertEqual(set(inval), { sd / x for x in + self.badfiles.keys() }) + self.assertEqual(hashdict, { + sd / 'fileb.txt': bytes.fromhex('cc06808cbbee0510331aa97974132e8dc296aeb795be229d064bae784b0a87a5cf4281d82e8c99271b75db2148f08a026c1a60ed9cabdb8cac6d24242dac4063'), + sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), + sd / 'filef/filef.txt': bytes.fromhex('be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09'), + })