MetaData Sharing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

203 lines
4.6 KiB

  1. from . import bencode
  2. from functools import reduce
  3. from hashlib import sha1
  4. import importlib.resources
  5. import itertools
  6. import os
  7. import pathlib
  8. import shutil
  9. import sys
  10. import tempfile
  11. import unittest
  12. class Storage:
  13. def __init__(self, rootpath, files, piecelen, encoding='us-ascii'):
  14. self._rootpath = pathlib.Path(rootpath)
  15. self._files = files
  16. self._piecelen = piecelen
  17. self._encoding = encoding
  18. self._buildindex()
  19. def _filepaths(self):
  20. for curfile in self._files:
  21. fname = pathlib.Path(
  22. *(x.decode(self._encoding) for x in
  23. curfile['path']))
  24. curfilepath = self._rootpath / fname
  25. yield curfile, fname, curfilepath
  26. def allfiles(self):
  27. for x, y, curfilepath in self._filepaths():
  28. yield curfilepath
  29. def _buildindex(self):
  30. self._index = []
  31. files = self._filepaths()
  32. left = 0
  33. curfile = None
  34. while True:
  35. if curfile is None or curfileoff == curfile['length']:
  36. # next file
  37. try:
  38. curfile, fname, curfilepath = next(files)
  39. except StopIteration:
  40. break
  41. curfileoff = 0
  42. if left == 0:
  43. current = []
  44. self._index.append(current)
  45. left = self._piecelen
  46. sz = min(curfile['length'] - curfileoff, left)
  47. current.append(dict(file=curfilepath, fname=fname,
  48. offset=curfileoff, size=sz))
  49. curfileoff += sz
  50. left -= sz
  51. def filesforpiece(self, idx):
  52. for x in self._index[idx]:
  53. yield x['file']
  54. def apply_piece(self, idx, fun):
  55. for i in self._index[idx]:
  56. with open(i['file'], 'rb') as fp:
  57. fp.seek(i['offset'])
  58. fun(fp.read(i['size']))
  59. def validate(torrent, basedir):
  60. info = torrent['info']
  61. basedir = pathlib.Path(basedir)
  62. try:
  63. encoding = torrent['encoding'].decode('us-ascii')
  64. except KeyError:
  65. encoding = 'us-ascii'
  66. torrentdir = basedir / info['name'].decode(encoding)
  67. stor = Storage(torrentdir, info['files'], info['piece length'], encoding)
  68. pieces = info['pieces']
  69. piecescnt = len(pieces) // 20
  70. valid = [ None ] * piecescnt
  71. for num, i in enumerate(pieces[x:x+20] for x in range(0, len(pieces),
  72. 20)):
  73. hash = sha1()
  74. stor.apply_piece(num, hash.update)
  75. if hash.digest() == i:
  76. valid[num] = True
  77. else:
  78. valid[num] = False
  79. # if any piece of a file is bad, it's bad
  80. allfiles = set(stor.allfiles())
  81. badpieces = [ x for x, v in enumerate(valid) if not v ]
  82. badfiles = reduce(set.__or__, (set(stor.filesforpiece(x)) for x in
  83. badpieces), set())
  84. return allfiles - badfiles, badfiles
  85. class _TestCases(unittest.TestCase):
  86. dirname = 'somedir'
  87. origfiledata = {
  88. 'filea.txt': b'foo\n',
  89. 'fileb.txt': b'bar\n',
  90. 'filec.txt': b'bleha\n',
  91. 'filed.txt': b'somehow\n',
  92. 'filee.txt': b'nowab\n',
  93. 'filef/filef.txt': b'\n',
  94. }
  95. def setUp(self):
  96. d = pathlib.Path(tempfile.mkdtemp()).resolve()
  97. tor = importlib.resources.files(__name__)
  98. tor = tor / 'fixtures' / 'somedir.torrent'
  99. with tor.open('rb') as fp:
  100. self.torrent = bencode.bdecode(fp.read())
  101. self.basetempdir = d
  102. self.oldcwd = os.getcwd()
  103. os.chdir(d)
  104. def tearDown(self):
  105. shutil.rmtree(self.basetempdir)
  106. os.chdir(self.oldcwd)
  107. @staticmethod
  108. def make_files(dname, fdict):
  109. dname = pathlib.Path(dname)
  110. for k, v in fdict.items():
  111. k = dname / pathlib.PurePosixPath(k)
  112. k.parent.mkdir(parents=True, exist_ok=True)
  113. with open(k, 'wb') as fp:
  114. fp.write(v)
  115. def test_completeverif(self):
  116. sd = self.basetempdir / self.dirname
  117. sd.mkdir()
  118. self.make_files(sd, self.origfiledata)
  119. validate(self.torrent, self.basetempdir)
  120. # encoded names
  121. sd = self.basetempdir / 'thai'
  122. sd.mkdir()
  123. self.make_files(sd, { 'thai - สวัสดี.txt': b'hello\n'
  124. })
  125. tor = importlib.resources.files(__name__)
  126. tor = tor / 'fixtures' / 'thai.torrent'
  127. with tor.open('rb') as fp:
  128. torrent = bencode.bdecode(fp.read())
  129. validate(torrent, self.basetempdir)
  130. def test_verification(self):
  131. # Testing for "missing" files
  132. # piece size 2 (aka 4 bytes)
  133. # empty file of 4 bytes 'foo\n'
  134. # complete file of 4 bytes 'bar\n'
  135. # partial missing file, 6 bytes, last two correct 'bleha\n'
  136. # complete file of 8 bytes (multiple pieces) 'somehow\n'
  137. # partial missing file, starting w/ 2 bytes, length 6 'nowab\n'
  138. # complete file (length 1) '\n'
  139. missingfiles = self.origfiledata.copy()
  140. badfiles = {
  141. 'filea.txt': b'',
  142. 'filec.txt': b'\x00\x00\x00\x00a\n',
  143. 'filee.txt': b'no',
  144. }
  145. missingfiles.update(badfiles)
  146. sd = self.basetempdir / self.dirname
  147. sd.mkdir()
  148. self.make_files(sd, missingfiles)
  149. val, inval = validate(self.torrent, self.basetempdir)
  150. self.assertEqual(set(val), { sd / x for x in missingfiles.keys() if x not in badfiles })
  151. self.assertEqual(set(inval), { sd / x for x in badfiles.keys() })