MetaData Sharing
 
 
 
 

272 lines
6.3 KiB

  1. from . import bencode
  2. from functools import reduce
  3. from hashlib import sha1
  4. import importlib.resources
  5. import itertools
  6. import os
  7. import pathlib
  8. import shutil
  9. import sys
  10. import tempfile
  11. import unittest
  12. _encoding = 'utf-8'
  13. __all__ = [ 'validate', 'validate_file' ]
  14. class Storage:
  15. '''A class to help read pieces of a torrent.
  16. '''
  17. def __init__(self, rootpath, files, piecelen):
  18. '''
  19. rootpath - path to the dir of torrent files are in
  20. files - the files dictionary from the torrent info key
  21. piecelen - piece length from the torren info key
  22. '''
  23. self._rootpath = pathlib.Path(rootpath)
  24. self._files = files
  25. self._piecelen = piecelen
  26. self._buildindex()
  27. def _filepaths(self):
  28. '''Iterates over all the files in the torrent.
  29. Each item is a tuple of:
  30. array of file path components (undecoded)
  31. a pathlib.PurePath for the file
  32. a pathlib.Path for file on disk
  33. '''
  34. for curfile in self._files:
  35. fname = pathlib.PurePath(
  36. *(x.decode(_encoding) for x in
  37. curfile['path']))
  38. curfilepath = self._rootpath / fname
  39. yield curfile, fname, curfilepath
  40. def allfiles(self):
  41. '''Iterator that returns each on disk path name for
  42. each file.'''
  43. for x, y, curfilepath in self._filepaths():
  44. yield curfilepath
  45. def _buildindex(self):
  46. '''Internal function to build the needed indexes for
  47. pieces and files.'''
  48. self._pieceindex = []
  49. self._fileindex = {}
  50. files = self._filepaths()
  51. left = 0
  52. curfile = None
  53. while True:
  54. if curfile is None or curfileoff == curfile['length']:
  55. # next file
  56. try:
  57. curfile, fname, curfilepath = next(files)
  58. except StopIteration:
  59. break
  60. curfileoff = 0
  61. if left == 0:
  62. current = []
  63. self._fileindex.setdefault(fname,
  64. []).append(len(self._pieceindex))
  65. self._pieceindex.append(current)
  66. left = self._piecelen
  67. sz = min(curfile['length'] - curfileoff, left)
  68. current.append(dict(file=curfilepath, fname=fname,
  69. offset=curfileoff, size=sz))
  70. curfileoff += sz
  71. left -= sz
  72. def filepieces(self):
  73. '''Iterator that returns a pair, first item is the subpath
  74. to a file (that is relative to the torrent dir), and the
  75. pieces that cover the file.'''
  76. return self._fileindex.items()
  77. def filesforpiece(self, idx):
  78. '''Return a list of files that are covered by piece idx.'''
  79. for x in self._pieceindex[idx]:
  80. yield x['file']
  81. def apply_piece(self, idx, fun):
  82. '''Read the parts of piece idx, and call fun w/ each part.
  83. This is to hash the parts, e.g.
  84. hash = sha1()
  85. stor.apply_piece(num, hash.update)
  86. hash now contains the digest for the piece.'''
  87. for i in self._pieceindex[idx]:
  88. with open(i['file'], 'rb') as fp:
  89. fp.seek(i['offset'])
  90. fun(fp.read(i['size']))
  91. def validate_file(fname):
  92. fname = pathlib.Path(fname)
  93. with open(fname, 'rb') as fp:
  94. torrent = bencode.bdecode(fp.read())
  95. dirname = list(fname.parent.rglob(torrent['info']['name'].decode(_encoding)))[0]
  96. tordir = dirname.parent
  97. return validate(torrent, tordir)
  98. def validate(torrent, basedir):
  99. '''Take a decode torrent file, where it was stored in basedir,
  100. verify the torrent. Returns a pair of set, the first is all the
  101. files that are valid, the second are all the invalid files.'''
  102. info = torrent['info']
  103. basedir = pathlib.Path(basedir)
  104. torrentdir = basedir / info['name'].decode(_encoding)
  105. stor = Storage(torrentdir, info['files'], info['piece length'])
  106. pieces = info['pieces']
  107. piecescnt = len(pieces) // 20
  108. valid = [ None ] * piecescnt
  109. for num, i in enumerate(pieces[x:x+20] for x in range(0, len(pieces),
  110. 20)):
  111. hash = sha1()
  112. stor.apply_piece(num, hash.update)
  113. if hash.digest() == i:
  114. valid[num] = True
  115. else:
  116. valid[num] = False
  117. # if any piece of a file is bad, it's bad
  118. allfiles = set(stor.allfiles())
  119. badfiles = { torrentdir / x for x, y in stor.filepieces() if
  120. not all(valid[i] for i in y) }
  121. return allfiles - badfiles, badfiles
  122. class _TestCases(unittest.TestCase):
  123. dirname = 'somedir'
  124. # file contents for somedir.torrent
  125. origfiledata = {
  126. 'filea.txt': b'foo\n',
  127. 'fileb.txt': b'bar\n',
  128. 'filec.txt': b'bleha\n',
  129. 'filed.txt': b'somehow\n',
  130. 'filee.txt': b'nowab\n',
  131. 'filef/filef.txt': b'\n',
  132. }
  133. # some munging to make some files bad
  134. badfiles = {
  135. 'filea.txt': b'',
  136. 'filec.txt': b'\x00\x00\x00\x00a\n',
  137. 'filee.txt': b'no',
  138. }
  139. def setUp(self):
  140. d = pathlib.Path(tempfile.mkdtemp()).resolve()
  141. self.basetempdir = d
  142. tor = importlib.resources.files(__name__)
  143. tor = tor / 'fixtures' / 'somedir.torrent'
  144. with tor.open('rb') as fp:
  145. self.torrent = bencode.bdecode(fp.read())
  146. self.oldcwd = os.getcwd()
  147. os.chdir(d)
  148. def tearDown(self):
  149. shutil.rmtree(self.basetempdir)
  150. os.chdir(self.oldcwd)
  151. @staticmethod
  152. def make_files(dname, fdict):
  153. dname = pathlib.Path(dname)
  154. for k, v in fdict.items():
  155. k = dname / pathlib.PurePosixPath(k)
  156. k.parent.mkdir(parents=True, exist_ok=True)
  157. with open(k, 'wb') as fp:
  158. fp.write(v)
  159. def test_completeverif(self):
  160. tf = self.basetempdir / 'a.torrent'
  161. with open(tf, 'wb') as fp:
  162. fp.write(bencode.bencode(self.torrent))
  163. sd = self.basetempdir / 'anotherdir' / self.dirname
  164. sd.parent.mkdir()
  165. sd.mkdir()
  166. self.make_files(sd, self.origfiledata)
  167. good, bad = validate_file(tf)
  168. self.assertFalse(bad)
  169. # that utf-8 encoded names work
  170. sd = self.basetempdir / 'thai'
  171. sd.mkdir()
  172. self.make_files(sd, { 'thai - สวัสดี.txt': b'hello\n'
  173. })
  174. tor = importlib.resources.files(__name__)
  175. tor = tor / 'fixtures' / 'thai.torrent'
  176. with tor.open('rb') as fp:
  177. torrent = bencode.bdecode(fp.read())
  178. good, bad = validate(torrent, self.basetempdir)
  179. self.assertFalse(bad)
  180. def test_verification(self):
  181. # Testing for "missing" files
  182. # piece size 2 (aka 4 bytes)
  183. # empty file of 4 bytes 'foo\n'
  184. # complete file of 4 bytes 'bar\n'
  185. # partial missing file, 6 bytes, last two correct 'bleha\n'
  186. # complete file of 8 bytes (multiple pieces) 'somehow\n'
  187. # partial missing file, starting w/ 2 bytes, length 6 'nowab\n'
  188. # complete file (length 1) '\n'
  189. missingfiles = self.origfiledata.copy()
  190. missingfiles.update(self.badfiles)
  191. sd = self.basetempdir / self.dirname
  192. sd.mkdir()
  193. self.make_files(sd, missingfiles)
  194. val, inval = validate(self.torrent, self.basetempdir)
  195. self.assertEqual(set(val), { sd / x for x in
  196. missingfiles.keys() if x not in self.badfiles })
  197. self.assertEqual(set(inval), { sd / x for x in
  198. self.badfiles.keys() })