Browse Source

support getting file hashes at same time as verification...

main
John-Mark Gurney 2 years ago
parent
commit
b5214e47a4
1 changed files with 92 additions and 11 deletions
  1. +92
    -11
      ui/medashare/btv/__init__.py

+ 92
- 11
ui/medashare/btv/__init__.py View File

@@ -2,7 +2,7 @@
from . import bencode
import fnmatch
from functools import reduce
from hashlib import sha1
from hashlib import sha1, sha512
import importlib.resources
import itertools
import os
@@ -56,12 +56,12 @@ class Storage:

Each item is a tuple of:
array of file path components (undecoded)
a pathlib.PurePath for the file
a pathlib.PurePosixPath for the file
a pathlib.Path for file on disk
'''

for curfile in self._files:
fname = pathlib.PurePath(
fname = pathlib.PurePosixPath(
*(x.decode(_encoding) for x in
curfile['path']))
curfilepath = self._rootpath / fname
@@ -134,9 +134,21 @@ class Storage:
for i in self._pieceindex[idx]:
with open(i['file'], 'rb') as fp:
fp.seek(i['offset'])
fun(fp.read(i['size']))
fun(fp.read(i['size']), i.get('fname'),
i['offset'])

def validate_file(fname, with_file_hashes=None):
'''Take a torrent file fname, find the stored data (searching
subdirectories and verify the torrent. Returns a pair of set, the
first is all the files that are valid, the second are all the
invalid files.

The arg with_file_hashes, if specified, must be a hashlib like
factory function. It will be processed on a per file basis, and
a third argument will be returned as a dict w/ the file name as
key and the digest as the value of each file..
'''

def validate_file(fname):
fname = pathlib.Path(fname)

with open(fname, 'rb') as fp:
@@ -148,16 +160,22 @@ def validate_file(fname):
tordir = dirname.parent

try:
return validate(torrent, tordir)
return validate(torrent, tordir, with_file_hashes)
except FileNotFoundError as e:
continue
else:
raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name)))

def validate(torrent, basedir):
def validate(torrent, basedir, with_file_hashes=None):
'''Take a decode torrent file, where it was stored in basedir,
verify the torrent. Returns a pair of set, the first is all the
files that are valid, the second are all the invalid files.'''
files that are valid, the second are all the invalid files.

The arg with_file_hashes, if specified, must be a hashlib like
factory function. It will be processed on a per file basis, and
a third argument will be returned as a dict w/ the file name as
key and the digest as the value.
'''

info = torrent['info']

@@ -168,6 +186,20 @@ def validate(torrent, basedir):
files = info.get('files', None)
stor = Storage(torrentdir, files, info['piece length'])

file_hashes = dict()

def apply_fun(data, fname, offset):
if with_file_hashes is not None:
hashobj, curoff = file_hashes.setdefault(fname,
(with_file_hashes(), 0))

if curoff == offset:
hashobj.update(data)
file_hashes[fname] = (hashobj, offset +
len(data))

hash.update(data)

pieces = info['pieces']
piecescnt = len(pieces) // 20
valid = [ None ] * piecescnt
@@ -175,13 +207,20 @@ def validate(torrent, basedir):
20)):
hash = sha1()

stor.apply_piece(num, hash.update)
stor.apply_piece(num, apply_fun)

if hash.digest() == i:
valid[num] = True
else:
valid[num] = False

if files is None:
filesizes = { pathlib.PurePosixPath(info['name'].decode(
_encoding)): info['length'] }
else:
filesizes = { pathlib.PurePosixPath(*(x.decode(_encoding) for
x in o['path'])): o['length'] for o in files }

if files is None:
# single file
f, e = set([ torrentdir ]), set()
@@ -189,7 +228,13 @@ def validate(torrent, basedir):
if not all(valid):
f, e = e, f

return f,e
if with_file_hashes:
file_hashes = { torrentdir: hashobj.digest() for fname, (hashobj,
off) in file_hashes.items() if info['length'] == off and
torrentdir in f }
return f, e, file_hashes

return f, e

# if any piece of a file is bad, it's bad
allfiles = set(stor.allfiles())
@@ -197,7 +242,15 @@ def validate(torrent, basedir):
badfiles = { torrentdir / x for x, y in stor.filepieces() if
not all(valid[i] for i in y) }

return allfiles - badfiles, badfiles
r = (allfiles - badfiles, badfiles,)

file_hashes = { torrentdir / fname: hashobj.digest() for fname, (hashobj,
off) in file_hashes.items() if filesizes[fname] == off and
(torrentdir / fname) in r[0] }

if with_file_hashes is not None:
r += (file_hashes, )
return r

class _TestCases(unittest.TestCase):
dirname = 'somedir'
@@ -320,6 +373,21 @@ class _TestCases(unittest.TestCase):
self.assertFalse(bad)
self.assertEqual(good, { sd / 'filed.txt' })

good, bad, hashes = validate_file(tor, with_file_hashes=sha512)

self.assertFalse(bad)
self.assertEqual(good, { sd / 'filed.txt' })
self.assertEqual(hashes, { sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), })

with open(sd / 'filed.txt', 'w') as fp:
fp.write('weoifj')

good, bad, hashes = validate_file(tor, with_file_hashes=sha512)

self.assertEqual(bad, { sd / 'filed.txt' })
self.assertFalse(good)
self.assertEqual(hashes, {})

def test_verification(self):
# Testing for "missing" files
# piece size 2 (aka 4 bytes)
@@ -345,3 +413,16 @@ class _TestCases(unittest.TestCase):
missingfiles.keys() if x not in self.badfiles })
self.assertEqual(set(inval), { sd / x for x in
self.badfiles.keys() })

val, inval, hashdict = validate(self.torrent, self.basetempdir,
with_file_hashes=sha512)

self.assertEqual(set(val), { sd / x for x in
missingfiles.keys() if x not in self.badfiles })
self.assertEqual(set(inval), { sd / x for x in
self.badfiles.keys() })
self.assertEqual(hashdict, {
sd / 'fileb.txt': bytes.fromhex('cc06808cbbee0510331aa97974132e8dc296aeb795be229d064bae784b0a87a5cf4281d82e8c99271b75db2148f08a026c1a60ed9cabdb8cac6d24242dac4063'),
sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'),
sd / 'filef/filef.txt': bytes.fromhex('be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09'),
})

Loading…
Cancel
Save