MetaData Sharing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

251 lines
5.9 KiB

  1. from . import bencode
  2. from functools import reduce
  3. from hashlib import sha1
  4. import importlib.resources
  5. import itertools
  6. import os
  7. import pathlib
  8. import shutil
  9. import sys
  10. import tempfile
  11. import unittest
  12. _encoding = 'utf-8'
  13. __all__ = [ 'validate' ]
  14. class Storage:
  15. '''A class to help read pieces of a torrent.
  16. '''
  17. def __init__(self, rootpath, files, piecelen):
  18. '''
  19. rootpath - path to the dir of torrent files are in
  20. files - the files dictionary from the torrent info key
  21. piecelen - piece length from the torren info key
  22. '''
  23. self._rootpath = pathlib.Path(rootpath)
  24. self._files = files
  25. self._piecelen = piecelen
  26. self._buildindex()
  27. def _filepaths(self):
  28. '''Iterates over all the files in the torrent.
  29. Each item is a tuple of:
  30. array of file path components (undecoded)
  31. a pathlib.PurePath for the file
  32. a pathlib.Path for file on disk
  33. '''
  34. for curfile in self._files:
  35. fname = pathlib.PurePath(
  36. *(x.decode(_encoding) for x in
  37. curfile['path']))
  38. curfilepath = self._rootpath / fname
  39. yield curfile, fname, curfilepath
  40. def allfiles(self):
  41. '''Iterator that returns each on disk path name for
  42. each file.'''
  43. for x, y, curfilepath in self._filepaths():
  44. yield curfilepath
  45. def _buildindex(self):
  46. '''Internal function to build the needed indexes for
  47. pieces and files.'''
  48. self._pieceindex = []
  49. self._fileindex = {}
  50. files = self._filepaths()
  51. left = 0
  52. curfile = None
  53. while True:
  54. if curfile is None or curfileoff == curfile['length']:
  55. # next file
  56. try:
  57. curfile, fname, curfilepath = next(files)
  58. except StopIteration:
  59. break
  60. curfileoff = 0
  61. if left == 0:
  62. current = []
  63. self._fileindex.setdefault(fname,
  64. []).append(len(self._pieceindex))
  65. self._pieceindex.append(current)
  66. left = self._piecelen
  67. sz = min(curfile['length'] - curfileoff, left)
  68. current.append(dict(file=curfilepath, fname=fname,
  69. offset=curfileoff, size=sz))
  70. curfileoff += sz
  71. left -= sz
  72. def filepieces(self):
  73. '''Iterator that returns a pair, first item is the subpath
  74. to a file (that is relative to the torrent dir), and the
  75. pieces that cover the file.'''
  76. return self._fileindex.items()
  77. def filesforpiece(self, idx):
  78. '''Return a list of files that are covered by piece idx.'''
  79. for x in self._pieceindex[idx]:
  80. yield x['file']
  81. def apply_piece(self, idx, fun):
  82. '''Read the parts of piece idx, and call fun w/ each part.
  83. This is to hash the parts, e.g.
  84. hash = sha1()
  85. stor.apply_piece(num, hash.update)
  86. hash now contains the digest for the piece.'''
  87. for i in self._pieceindex[idx]:
  88. with open(i['file'], 'rb') as fp:
  89. fp.seek(i['offset'])
  90. fun(fp.read(i['size']))
  91. def validate(torrent, basedir):
  92. '''Take a decode torrent file, where it was stored in basedir,
  93. verify the torrent. Returns a pair of set, the first is all the
  94. files that are valid, the second are all the invalid files.'''
  95. info = torrent['info']
  96. basedir = pathlib.Path(basedir)
  97. torrentdir = basedir / info['name'].decode(_encoding)
  98. stor = Storage(torrentdir, info['files'], info['piece length'])
  99. pieces = info['pieces']
  100. piecescnt = len(pieces) // 20
  101. valid = [ None ] * piecescnt
  102. for num, i in enumerate(pieces[x:x+20] for x in range(0, len(pieces),
  103. 20)):
  104. hash = sha1()
  105. stor.apply_piece(num, hash.update)
  106. if hash.digest() == i:
  107. valid[num] = True
  108. else:
  109. valid[num] = False
  110. # if any piece of a file is bad, it's bad
  111. allfiles = set(stor.allfiles())
  112. badfiles = { torrentdir / x for x, y in stor.filepieces() if
  113. not all(valid[i] for i in y) }
  114. return allfiles - badfiles, badfiles
  115. class _TestCases(unittest.TestCase):
  116. dirname = 'somedir'
  117. # file contents for somedir.torrent
  118. origfiledata = {
  119. 'filea.txt': b'foo\n',
  120. 'fileb.txt': b'bar\n',
  121. 'filec.txt': b'bleha\n',
  122. 'filed.txt': b'somehow\n',
  123. 'filee.txt': b'nowab\n',
  124. 'filef/filef.txt': b'\n',
  125. }
  126. # some munging to make some files bad
  127. badfiles = {
  128. 'filea.txt': b'',
  129. 'filec.txt': b'\x00\x00\x00\x00a\n',
  130. 'filee.txt': b'no',
  131. }
  132. def setUp(self):
  133. d = pathlib.Path(tempfile.mkdtemp()).resolve()
  134. tor = importlib.resources.files(__name__)
  135. tor = tor / 'fixtures' / 'somedir.torrent'
  136. with tor.open('rb') as fp:
  137. self.torrent = bencode.bdecode(fp.read())
  138. self.basetempdir = d
  139. self.oldcwd = os.getcwd()
  140. os.chdir(d)
  141. def tearDown(self):
  142. shutil.rmtree(self.basetempdir)
  143. os.chdir(self.oldcwd)
  144. @staticmethod
  145. def make_files(dname, fdict):
  146. dname = pathlib.Path(dname)
  147. for k, v in fdict.items():
  148. k = dname / pathlib.PurePosixPath(k)
  149. k.parent.mkdir(parents=True, exist_ok=True)
  150. with open(k, 'wb') as fp:
  151. fp.write(v)
  152. def test_completeverif(self):
  153. sd = self.basetempdir / self.dirname
  154. sd.mkdir()
  155. self.make_files(sd, self.origfiledata)
  156. validate(self.torrent, self.basetempdir)
  157. # that utf-8 encoded names work
  158. sd = self.basetempdir / 'thai'
  159. sd.mkdir()
  160. self.make_files(sd, { 'thai - สวัสดี.txt': b'hello\n'
  161. })
  162. tor = importlib.resources.files(__name__)
  163. tor = tor / 'fixtures' / 'thai.torrent'
  164. with tor.open('rb') as fp:
  165. torrent = bencode.bdecode(fp.read())
  166. validate(torrent, self.basetempdir)
  167. def test_verification(self):
  168. # Testing for "missing" files
  169. # piece size 2 (aka 4 bytes)
  170. # empty file of 4 bytes 'foo\n'
  171. # complete file of 4 bytes 'bar\n'
  172. # partial missing file, 6 bytes, last two correct 'bleha\n'
  173. # complete file of 8 bytes (multiple pieces) 'somehow\n'
  174. # partial missing file, starting w/ 2 bytes, length 6 'nowab\n'
  175. # complete file (length 1) '\n'
  176. missingfiles = self.origfiledata.copy()
  177. missingfiles.update(self.badfiles)
  178. sd = self.basetempdir / self.dirname
  179. sd.mkdir()
  180. self.make_files(sd, missingfiles)
  181. val, inval = validate(self.torrent, self.basetempdir)
  182. self.assertEqual(set(val), { sd / x for x in
  183. missingfiles.keys() if x not in self.badfiles })
  184. self.assertEqual(set(inval), { sd / x for x in
  185. self.badfiles.keys() })