MetaData Sharing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

217 lines
4.9 KiB

  1. from . import bencode
  2. from functools import reduce
  3. from hashlib import sha1
  4. import importlib.resources
  5. import itertools
  6. import os
  7. import pathlib
  8. import shutil
  9. import sys
  10. import tempfile
  11. import unittest
  12. _encoding = 'utf-8'
  13. class Storage:
  14. def __init__(self, rootpath, files, piecelen):
  15. self._rootpath = pathlib.Path(rootpath)
  16. self._files = files
  17. self._piecelen = piecelen
  18. self._buildindex()
  19. def _filepaths(self):
  20. '''Iterates over all the files in the torrent.
  21. Each item is a tuple of:
  22. array of file path components (undecoded)
  23. a pathlib.PurePath for the file
  24. a pathlib.Path for file on disk
  25. '''
  26. for curfile in self._files:
  27. fname = pathlib.PurePath(
  28. *(x.decode(_encoding) for x in
  29. curfile['path']))
  30. curfilepath = self._rootpath / fname
  31. yield curfile, fname, curfilepath
  32. def allfiles(self):
  33. for x, y, curfilepath in self._filepaths():
  34. yield curfilepath
  35. def _buildindex(self):
  36. self._pieceindex = []
  37. self._fileindex = {}
  38. files = self._filepaths()
  39. left = 0
  40. curfile = None
  41. while True:
  42. if curfile is None or curfileoff == curfile['length']:
  43. # next file
  44. try:
  45. curfile, fname, curfilepath = next(files)
  46. except StopIteration:
  47. break
  48. curfileoff = 0
  49. if left == 0:
  50. current = []
  51. self._fileindex.setdefault(fname,
  52. []).append(len(self._pieceindex))
  53. self._pieceindex.append(current)
  54. left = self._piecelen
  55. sz = min(curfile['length'] - curfileoff, left)
  56. current.append(dict(file=curfilepath, fname=fname,
  57. offset=curfileoff, size=sz))
  58. curfileoff += sz
  59. left -= sz
  60. def filepieces(self):
  61. return self._fileindex.items()
  62. def filesforpiece(self, idx):
  63. for x in self._pieceindex[idx]:
  64. yield x['file']
  65. def apply_piece(self, idx, fun):
  66. for i in self._pieceindex[idx]:
  67. with open(i['file'], 'rb') as fp:
  68. fp.seek(i['offset'])
  69. fun(fp.read(i['size']))
  70. def validate(torrent, basedir):
  71. info = torrent['info']
  72. basedir = pathlib.Path(basedir)
  73. torrentdir = basedir / info['name'].decode(_encoding)
  74. stor = Storage(torrentdir, info['files'], info['piece length'])
  75. pieces = info['pieces']
  76. piecescnt = len(pieces) // 20
  77. valid = [ None ] * piecescnt
  78. for num, i in enumerate(pieces[x:x+20] for x in range(0, len(pieces),
  79. 20)):
  80. hash = sha1()
  81. stor.apply_piece(num, hash.update)
  82. if hash.digest() == i:
  83. valid[num] = True
  84. else:
  85. valid[num] = False
  86. # if any piece of a file is bad, it's bad
  87. allfiles = set(stor.allfiles())
  88. badfiles = { torrentdir / x for x, y in stor.filepieces() if
  89. not all(valid[i] for i in y) }
  90. return allfiles - badfiles, badfiles
  91. class _TestCases(unittest.TestCase):
  92. dirname = 'somedir'
  93. # file contents for somedir.torrent
  94. origfiledata = {
  95. 'filea.txt': b'foo\n',
  96. 'fileb.txt': b'bar\n',
  97. 'filec.txt': b'bleha\n',
  98. 'filed.txt': b'somehow\n',
  99. 'filee.txt': b'nowab\n',
  100. 'filef/filef.txt': b'\n',
  101. }
  102. # some munging to make some files bad
  103. badfiles = {
  104. 'filea.txt': b'',
  105. 'filec.txt': b'\x00\x00\x00\x00a\n',
  106. 'filee.txt': b'no',
  107. }
  108. def setUp(self):
  109. d = pathlib.Path(tempfile.mkdtemp()).resolve()
  110. tor = importlib.resources.files(__name__)
  111. tor = tor / 'fixtures' / 'somedir.torrent'
  112. with tor.open('rb') as fp:
  113. self.torrent = bencode.bdecode(fp.read())
  114. self.basetempdir = d
  115. self.oldcwd = os.getcwd()
  116. os.chdir(d)
  117. def tearDown(self):
  118. shutil.rmtree(self.basetempdir)
  119. os.chdir(self.oldcwd)
  120. @staticmethod
  121. def make_files(dname, fdict):
  122. dname = pathlib.Path(dname)
  123. for k, v in fdict.items():
  124. k = dname / pathlib.PurePosixPath(k)
  125. k.parent.mkdir(parents=True, exist_ok=True)
  126. with open(k, 'wb') as fp:
  127. fp.write(v)
  128. def test_completeverif(self):
  129. sd = self.basetempdir / self.dirname
  130. sd.mkdir()
  131. self.make_files(sd, self.origfiledata)
  132. validate(self.torrent, self.basetempdir)
  133. # encoded names
  134. sd = self.basetempdir / 'thai'
  135. sd.mkdir()
  136. self.make_files(sd, { 'thai - สวัสดี.txt': b'hello\n'
  137. })
  138. tor = importlib.resources.files(__name__)
  139. tor = tor / 'fixtures' / 'thai.torrent'
  140. with tor.open('rb') as fp:
  141. torrent = bencode.bdecode(fp.read())
  142. validate(torrent, self.basetempdir)
  143. def test_verification(self):
  144. # Testing for "missing" files
  145. # piece size 2 (aka 4 bytes)
  146. # empty file of 4 bytes 'foo\n'
  147. # complete file of 4 bytes 'bar\n'
  148. # partial missing file, 6 bytes, last two correct 'bleha\n'
  149. # complete file of 8 bytes (multiple pieces) 'somehow\n'
  150. # partial missing file, starting w/ 2 bytes, length 6 'nowab\n'
  151. # complete file (length 1) '\n'
  152. missingfiles = self.origfiledata.copy()
  153. missingfiles.update(self.badfiles)
  154. sd = self.basetempdir / self.dirname
  155. sd.mkdir()
  156. self.make_files(sd, missingfiles)
  157. val, inval = validate(self.torrent, self.basetempdir)
  158. self.assertEqual(set(val), { sd / x for x in
  159. missingfiles.keys() if x not in self.badfiles })
  160. self.assertEqual(set(inval), { sd / x for x in
  161. self.badfiles.keys() })