MetaData Sharing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

326 lines
7.6 KiB

  1. from . import bencode
  2. import fnmatch
  3. from functools import reduce
  4. from hashlib import sha1
  5. import importlib.resources
  6. import itertools
  7. import os
  8. import pathlib
  9. import shutil
  10. import sys
  11. import tempfile
  12. import unittest
  13. _encoding = 'utf-8'
  14. __all__ = [ 'validate', 'validate_file' ]
  15. _escapes = '*?[]'
  16. def glob_escape(s):
  17. return ''.join(x if x not in _escapes else '[%s]' % x for x in s)
  18. def roundup(x, y):
  19. '''Round up x to the next multiple of y.'''
  20. return (x + y - 1) // y
  21. class Storage:
  22. '''A class to help read pieces of a torrent.
  23. '''
  24. def __init__(self, rootpath, files, piecelen):
  25. '''
  26. rootpath - path to the dir of torrent files are in
  27. files - the files dictionary from the torrent info key
  28. piecelen - piece length from the torren info key
  29. If files is None, then rootpath points at the single file.
  30. '''
  31. self._rootpath = pathlib.Path(rootpath)
  32. self._files = files
  33. self._piecelen = piecelen
  34. if files is None:
  35. # get length
  36. sz = self._rootpath.stat().st_size
  37. piececnt = roundup(sz, piecelen)
  38. self._pieceindex = [ [ dict(file=self._rootpath, offset=x * piecelen, size=piecelen if x < piececnt - 1 else sz - piececnt * x) ] for x in range(piececnt) ]
  39. else:
  40. self._buildindex()
  41. def _filepaths(self):
  42. '''Iterates over all the files in the torrent.
  43. Each item is a tuple of:
  44. array of file path components (undecoded)
  45. a pathlib.PurePath for the file
  46. a pathlib.Path for file on disk
  47. '''
  48. for curfile in self._files:
  49. fname = pathlib.PurePath(
  50. *(x.decode(_encoding) for x in
  51. curfile['path']))
  52. curfilepath = self._rootpath / fname
  53. yield curfile, fname, curfilepath
  54. def allfiles(self):
  55. '''Iterator that returns each on disk path name for
  56. each file.'''
  57. for x, y, curfilepath in self._filepaths():
  58. yield curfilepath
  59. def _buildindex(self):
  60. '''Internal function to build the needed indexes for
  61. pieces and files.'''
  62. self._pieceindex = []
  63. self._fileindex = {}
  64. files = self._filepaths()
  65. left = 0
  66. curfile = None
  67. while True:
  68. if curfile is None or curfileoff == curfile['length']:
  69. # next file
  70. try:
  71. curfile, fname, curfilepath = next(files)
  72. except StopIteration:
  73. break
  74. curfileoff = 0
  75. if left == 0:
  76. current = []
  77. self._fileindex.setdefault(fname,
  78. []).append(len(self._pieceindex))
  79. self._pieceindex.append(current)
  80. left = self._piecelen
  81. sz = min(curfile['length'] - curfileoff, left)
  82. current.append(dict(file=curfilepath, fname=fname,
  83. offset=curfileoff, size=sz))
  84. curfileoff += sz
  85. left -= sz
  86. def filepieces(self):
  87. '''Iterator that returns a pair, first item is the subpath
  88. to a file (that is relative to the torrent dir), and the
  89. pieces that cover the file.'''
  90. return self._fileindex.items()
  91. def filesforpiece(self, idx):
  92. '''Return a list of files that are covered by piece idx.'''
  93. for x in self._pieceindex[idx]:
  94. yield x['file']
  95. def apply_piece(self, idx, fun):
  96. '''Read the parts of piece idx, and call fun w/ each part.
  97. This is to hash the parts, e.g.
  98. hash = sha1()
  99. stor.apply_piece(num, hash.update)
  100. hash now contains the digest for the piece.'''
  101. for i in self._pieceindex[idx]:
  102. with open(i['file'], 'rb') as fp:
  103. fp.seek(i['offset'])
  104. fun(fp.read(i['size']))
  105. def validate_file(fname):
  106. fname = pathlib.Path(fname)
  107. with open(fname, 'rb') as fp:
  108. torrent = bencode.bdecode(fp.read())
  109. finddname = glob_escape(torrent['info']['name'].decode(_encoding))
  110. dirname = list(fname.parent.rglob(finddname))[0]
  111. tordir = dirname.parent
  112. return validate(torrent, tordir)
  113. def validate(torrent, basedir):
  114. '''Take a decode torrent file, where it was stored in basedir,
  115. verify the torrent. Returns a pair of set, the first is all the
  116. files that are valid, the second are all the invalid files.'''
  117. info = torrent['info']
  118. basedir = pathlib.Path(basedir)
  119. torrentdir = basedir / info['name'].decode(_encoding)
  120. files = info.get('files', None)
  121. stor = Storage(torrentdir, files, info['piece length'])
  122. pieces = info['pieces']
  123. piecescnt = len(pieces) // 20
  124. valid = [ None ] * piecescnt
  125. for num, i in enumerate(pieces[x:x+20] for x in range(0, len(pieces),
  126. 20)):
  127. hash = sha1()
  128. stor.apply_piece(num, hash.update)
  129. if hash.digest() == i:
  130. valid[num] = True
  131. else:
  132. valid[num] = False
  133. if files is None:
  134. # single file
  135. f, e = set([ torrentdir ]), set()
  136. if not all(valid):
  137. f, e = e, f
  138. return f,e
  139. # if any piece of a file is bad, it's bad
  140. allfiles = set(stor.allfiles())
  141. badfiles = { torrentdir / x for x, y in stor.filepieces() if
  142. not all(valid[i] for i in y) }
  143. return allfiles - badfiles, badfiles
  144. class _TestCases(unittest.TestCase):
  145. dirname = 'somedir'
  146. # file contents for somedir.torrent
  147. origfiledata = {
  148. 'filea.txt': b'foo\n',
  149. 'fileb.txt': b'bar\n',
  150. 'filec.txt': b'bleha\n',
  151. 'filed.txt': b'somehow\n',
  152. 'filee.txt': b'nowab\n',
  153. 'filef/filef.txt': b'\n',
  154. }
  155. # some munging to make some files bad
  156. badfiles = {
  157. 'filea.txt': b'',
  158. 'filec.txt': b'\x00\x00\x00\x00a\n',
  159. 'filee.txt': b'no',
  160. }
  161. def setUp(self):
  162. d = pathlib.Path(tempfile.mkdtemp()).resolve()
  163. self.basetempdir = d
  164. fixtures = importlib.resources.files(__name__) / 'fixtures'
  165. tor = fixtures / 'somedir.torrent'
  166. with tor.open('rb') as fp:
  167. self.torrent = bencode.bdecode(fp.read())
  168. self.fixtures = fixtures
  169. self.oldcwd = os.getcwd()
  170. os.chdir(d)
  171. def tearDown(self):
  172. shutil.rmtree(self.basetempdir)
  173. os.chdir(self.oldcwd)
  174. @staticmethod
  175. def make_files(dname, fdict):
  176. dname = pathlib.Path(dname)
  177. for k, v in fdict.items():
  178. k = dname / pathlib.PurePosixPath(k)
  179. k.parent.mkdir(parents=True, exist_ok=True)
  180. with open(k, 'wb') as fp:
  181. fp.write(v)
  182. def test_completeverif(self):
  183. tf = self.basetempdir / 'a.torrent'
  184. with open(tf, 'wb') as fp:
  185. fp.write(bencode.bencode(self.torrent))
  186. sd = self.basetempdir / 'anotherdir' / self.dirname
  187. sd.parent.mkdir()
  188. sd.mkdir()
  189. self.make_files(sd, self.origfiledata)
  190. good, bad = validate_file(tf)
  191. self.assertFalse(bad)
  192. # that utf-8 encoded names work
  193. sd = self.basetempdir / 'thai'
  194. sd.mkdir()
  195. self.make_files(sd, { 'thai - สวัสดี.txt': b'hello\n'
  196. })
  197. tor = importlib.resources.files(__name__)
  198. tor = tor / 'fixtures' / 'thai.torrent'
  199. with tor.open('rb') as fp:
  200. torrent = bencode.bdecode(fp.read())
  201. good, bad = validate(torrent, self.basetempdir)
  202. self.assertFalse(bad)
  203. def test_escapeglob(self):
  204. for i in [
  205. '*', '?', '[', '[]', '*?[][[*?',
  206. ]:
  207. self.assertTrue(fnmatch.fnmatch(i, glob_escape(i)))
  208. def test_validate_file_single(self):
  209. sd = self.basetempdir / 'anotherdir'
  210. sd.mkdir()
  211. self.make_files(sd, self.origfiledata)
  212. shutil.copy(self.fixtures / 'filed.txt.torrent', self.basetempdir)
  213. tor = self.basetempdir / 'filed.txt.torrent'
  214. good, bad = validate_file(tor)
  215. self.assertFalse(bad)
  216. self.assertEqual(good, { sd / 'filed.txt' })
  217. def test_verification(self):
  218. # Testing for "missing" files
  219. # piece size 2 (aka 4 bytes)
  220. # empty file of 4 bytes 'foo\n'
  221. # complete file of 4 bytes 'bar\n'
  222. # partial missing file, 6 bytes, last two correct 'bleha\n'
  223. # complete file of 8 bytes (multiple pieces) 'somehow\n'
  224. # partial missing file, starting w/ 2 bytes, length 6 'nowab\n'
  225. # complete file (length 1) '\n'
  226. missingfiles = self.origfiledata.copy()
  227. missingfiles.update(self.badfiles)
  228. sd = self.basetempdir / self.dirname
  229. sd.mkdir()
  230. self.make_files(sd, missingfiles)
  231. val, inval = validate(self.torrent, self.basetempdir)
  232. self.assertEqual(set(val), { sd / x for x in
  233. missingfiles.keys() if x not in self.badfiles })
  234. self.assertEqual(set(inval), { sd / x for x in
  235. self.badfiles.keys() })