|
@@ -2,7 +2,7 @@ |
|
|
from . import bencode |
|
|
from . import bencode |
|
|
import fnmatch |
|
|
import fnmatch |
|
|
from functools import reduce |
|
|
from functools import reduce |
|
|
from hashlib import sha1 |
|
|
|
|
|
|
|
|
from hashlib import sha1, sha512 |
|
|
import importlib.resources |
|
|
import importlib.resources |
|
|
import itertools |
|
|
import itertools |
|
|
import os |
|
|
import os |
|
@@ -56,12 +56,12 @@ class Storage: |
|
|
|
|
|
|
|
|
Each item is a tuple of: |
|
|
Each item is a tuple of: |
|
|
array of file path components (undecoded) |
|
|
array of file path components (undecoded) |
|
|
a pathlib.PurePath for the file |
|
|
|
|
|
|
|
|
a pathlib.PurePosixPath for the file |
|
|
a pathlib.Path for file on disk |
|
|
a pathlib.Path for file on disk |
|
|
''' |
|
|
''' |
|
|
|
|
|
|
|
|
for curfile in self._files: |
|
|
for curfile in self._files: |
|
|
fname = pathlib.PurePath( |
|
|
|
|
|
|
|
|
fname = pathlib.PurePosixPath( |
|
|
*(x.decode(_encoding) for x in |
|
|
*(x.decode(_encoding) for x in |
|
|
curfile['path'])) |
|
|
curfile['path'])) |
|
|
curfilepath = self._rootpath / fname |
|
|
curfilepath = self._rootpath / fname |
|
@@ -134,9 +134,21 @@ class Storage: |
|
|
for i in self._pieceindex[idx]: |
|
|
for i in self._pieceindex[idx]: |
|
|
with open(i['file'], 'rb') as fp: |
|
|
with open(i['file'], 'rb') as fp: |
|
|
fp.seek(i['offset']) |
|
|
fp.seek(i['offset']) |
|
|
fun(fp.read(i['size'])) |
|
|
|
|
|
|
|
|
fun(fp.read(i['size']), i.get('fname'), |
|
|
|
|
|
i['offset']) |
|
|
|
|
|
|
|
|
|
|
|
def validate_file(fname, with_file_hashes=None): |
|
|
|
|
|
'''Take a torrent file fname, find the stored data (searching |
|
|
|
|
|
subdirectories and verify the torrent. Returns a pair of set, the |
|
|
|
|
|
first is all the files that are valid, the second are all the |
|
|
|
|
|
invalid files. |
|
|
|
|
|
|
|
|
|
|
|
The arg with_file_hashes, if specified, must be a hashlib like |
|
|
|
|
|
factory function. It will be processed on a per file basis, and |
|
|
|
|
|
a third argument will be returned as a dict w/ the file name as |
|
|
|
|
|
key and the digest as the value of each file.. |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
def validate_file(fname): |
|
|
|
|
|
fname = pathlib.Path(fname) |
|
|
fname = pathlib.Path(fname) |
|
|
|
|
|
|
|
|
with open(fname, 'rb') as fp: |
|
|
with open(fname, 'rb') as fp: |
|
@@ -148,16 +160,22 @@ def validate_file(fname): |
|
|
tordir = dirname.parent |
|
|
tordir = dirname.parent |
|
|
|
|
|
|
|
|
try: |
|
|
try: |
|
|
return validate(torrent, tordir) |
|
|
|
|
|
|
|
|
return validate(torrent, tordir, with_file_hashes) |
|
|
except FileNotFoundError as e: |
|
|
except FileNotFoundError as e: |
|
|
continue |
|
|
continue |
|
|
else: |
|
|
else: |
|
|
raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name))) |
|
|
raise FileNotFoundError('unable to find directory for %s' % (repr(fname.name))) |
|
|
|
|
|
|
|
|
def validate(torrent, basedir): |
|
|
|
|
|
|
|
|
def validate(torrent, basedir, with_file_hashes=None): |
|
|
'''Take a decode torrent file, where it was stored in basedir, |
|
|
'''Take a decode torrent file, where it was stored in basedir, |
|
|
verify the torrent. Returns a pair of set, the first is all the |
|
|
verify the torrent. Returns a pair of set, the first is all the |
|
|
files that are valid, the second are all the invalid files.''' |
|
|
|
|
|
|
|
|
files that are valid, the second are all the invalid files. |
|
|
|
|
|
|
|
|
|
|
|
The arg with_file_hashes, if specified, must be a hashlib like |
|
|
|
|
|
factory function. It will be processed on a per file basis, and |
|
|
|
|
|
a third argument will be returned as a dict w/ the file name as |
|
|
|
|
|
key and the digest as the value. |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
info = torrent['info'] |
|
|
info = torrent['info'] |
|
|
|
|
|
|
|
@@ -168,6 +186,20 @@ def validate(torrent, basedir): |
|
|
files = info.get('files', None) |
|
|
files = info.get('files', None) |
|
|
stor = Storage(torrentdir, files, info['piece length']) |
|
|
stor = Storage(torrentdir, files, info['piece length']) |
|
|
|
|
|
|
|
|
|
|
|
file_hashes = dict() |
|
|
|
|
|
|
|
|
|
|
|
def apply_fun(data, fname, offset): |
|
|
|
|
|
if with_file_hashes is not None: |
|
|
|
|
|
hashobj, curoff = file_hashes.setdefault(fname, |
|
|
|
|
|
(with_file_hashes(), 0)) |
|
|
|
|
|
|
|
|
|
|
|
if curoff == offset: |
|
|
|
|
|
hashobj.update(data) |
|
|
|
|
|
file_hashes[fname] = (hashobj, offset + |
|
|
|
|
|
len(data)) |
|
|
|
|
|
|
|
|
|
|
|
hash.update(data) |
|
|
|
|
|
|
|
|
pieces = info['pieces'] |
|
|
pieces = info['pieces'] |
|
|
piecescnt = len(pieces) // 20 |
|
|
piecescnt = len(pieces) // 20 |
|
|
valid = [ None ] * piecescnt |
|
|
valid = [ None ] * piecescnt |
|
@@ -175,13 +207,20 @@ def validate(torrent, basedir): |
|
|
20)): |
|
|
20)): |
|
|
hash = sha1() |
|
|
hash = sha1() |
|
|
|
|
|
|
|
|
stor.apply_piece(num, hash.update) |
|
|
|
|
|
|
|
|
stor.apply_piece(num, apply_fun) |
|
|
|
|
|
|
|
|
if hash.digest() == i: |
|
|
if hash.digest() == i: |
|
|
valid[num] = True |
|
|
valid[num] = True |
|
|
else: |
|
|
else: |
|
|
valid[num] = False |
|
|
valid[num] = False |
|
|
|
|
|
|
|
|
|
|
|
if files is None: |
|
|
|
|
|
filesizes = { pathlib.PurePosixPath(info['name'].decode( |
|
|
|
|
|
_encoding)): info['length'] } |
|
|
|
|
|
else: |
|
|
|
|
|
filesizes = { pathlib.PurePosixPath(*(x.decode(_encoding) for |
|
|
|
|
|
x in o['path'])): o['length'] for o in files } |
|
|
|
|
|
|
|
|
if files is None: |
|
|
if files is None: |
|
|
# single file |
|
|
# single file |
|
|
f, e = set([ torrentdir ]), set() |
|
|
f, e = set([ torrentdir ]), set() |
|
@@ -189,7 +228,13 @@ def validate(torrent, basedir): |
|
|
if not all(valid): |
|
|
if not all(valid): |
|
|
f, e = e, f |
|
|
f, e = e, f |
|
|
|
|
|
|
|
|
return f,e |
|
|
|
|
|
|
|
|
if with_file_hashes: |
|
|
|
|
|
file_hashes = { torrentdir: hashobj.digest() for fname, (hashobj, |
|
|
|
|
|
off) in file_hashes.items() if info['length'] == off and |
|
|
|
|
|
torrentdir in f } |
|
|
|
|
|
return f, e, file_hashes |
|
|
|
|
|
|
|
|
|
|
|
return f, e |
|
|
|
|
|
|
|
|
# if any piece of a file is bad, it's bad |
|
|
# if any piece of a file is bad, it's bad |
|
|
allfiles = set(stor.allfiles()) |
|
|
allfiles = set(stor.allfiles()) |
|
@@ -197,7 +242,15 @@ def validate(torrent, basedir): |
|
|
badfiles = { torrentdir / x for x, y in stor.filepieces() if |
|
|
badfiles = { torrentdir / x for x, y in stor.filepieces() if |
|
|
not all(valid[i] for i in y) } |
|
|
not all(valid[i] for i in y) } |
|
|
|
|
|
|
|
|
return allfiles - badfiles, badfiles |
|
|
|
|
|
|
|
|
r = (allfiles - badfiles, badfiles,) |
|
|
|
|
|
|
|
|
|
|
|
file_hashes = { torrentdir / fname: hashobj.digest() for fname, (hashobj, |
|
|
|
|
|
off) in file_hashes.items() if filesizes[fname] == off and |
|
|
|
|
|
(torrentdir / fname) in r[0] } |
|
|
|
|
|
|
|
|
|
|
|
if with_file_hashes is not None: |
|
|
|
|
|
r += (file_hashes, ) |
|
|
|
|
|
return r |
|
|
|
|
|
|
|
|
class _TestCases(unittest.TestCase): |
|
|
class _TestCases(unittest.TestCase): |
|
|
dirname = 'somedir' |
|
|
dirname = 'somedir' |
|
@@ -320,6 +373,21 @@ class _TestCases(unittest.TestCase): |
|
|
self.assertFalse(bad) |
|
|
self.assertFalse(bad) |
|
|
self.assertEqual(good, { sd / 'filed.txt' }) |
|
|
self.assertEqual(good, { sd / 'filed.txt' }) |
|
|
|
|
|
|
|
|
|
|
|
good, bad, hashes = validate_file(tor, with_file_hashes=sha512) |
|
|
|
|
|
|
|
|
|
|
|
self.assertFalse(bad) |
|
|
|
|
|
self.assertEqual(good, { sd / 'filed.txt' }) |
|
|
|
|
|
self.assertEqual(hashes, { sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), }) |
|
|
|
|
|
|
|
|
|
|
|
with open(sd / 'filed.txt', 'w') as fp: |
|
|
|
|
|
fp.write('weoifj') |
|
|
|
|
|
|
|
|
|
|
|
good, bad, hashes = validate_file(tor, with_file_hashes=sha512) |
|
|
|
|
|
|
|
|
|
|
|
self.assertEqual(bad, { sd / 'filed.txt' }) |
|
|
|
|
|
self.assertFalse(good) |
|
|
|
|
|
self.assertEqual(hashes, {}) |
|
|
|
|
|
|
|
|
def test_verification(self): |
|
|
def test_verification(self): |
|
|
# Testing for "missing" files |
|
|
# Testing for "missing" files |
|
|
# piece size 2 (aka 4 bytes) |
|
|
# piece size 2 (aka 4 bytes) |
|
@@ -345,3 +413,16 @@ class _TestCases(unittest.TestCase): |
|
|
missingfiles.keys() if x not in self.badfiles }) |
|
|
missingfiles.keys() if x not in self.badfiles }) |
|
|
self.assertEqual(set(inval), { sd / x for x in |
|
|
self.assertEqual(set(inval), { sd / x for x in |
|
|
self.badfiles.keys() }) |
|
|
self.badfiles.keys() }) |
|
|
|
|
|
|
|
|
|
|
|
val, inval, hashdict = validate(self.torrent, self.basetempdir, |
|
|
|
|
|
with_file_hashes=sha512) |
|
|
|
|
|
|
|
|
|
|
|
self.assertEqual(set(val), { sd / x for x in |
|
|
|
|
|
missingfiles.keys() if x not in self.badfiles }) |
|
|
|
|
|
self.assertEqual(set(inval), { sd / x for x in |
|
|
|
|
|
self.badfiles.keys() }) |
|
|
|
|
|
self.assertEqual(hashdict, { |
|
|
|
|
|
sd / 'fileb.txt': bytes.fromhex('cc06808cbbee0510331aa97974132e8dc296aeb795be229d064bae784b0a87a5cf4281d82e8c99271b75db2148f08a026c1a60ed9cabdb8cac6d24242dac4063'), |
|
|
|
|
|
sd / 'filed.txt': bytes.fromhex('7831bd05e23877e08a97362bab2ad7bcc7d08d8f841f42e8dee545781792b987aa7637f12cec399e261f798c10d3475add0db7de2643af86a346b6b451a69ec4'), |
|
|
|
|
|
sd / 'filef/filef.txt': bytes.fromhex('be688838ca8686e5c90689bf2ab585cef1137c999b48c70b92f67a5c34dc15697b5d11c982ed6d71be1e1e7f7b4e0733884aa97c3f7a339a8ed03577cf74be09'), |
|
|
|
|
|
}) |