You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

793 lines
26 KiB

  1. # Copyright (c) 2011, SmartFile <btimby@smartfile.com>
  2. # All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are met:
  6. # * Redistributions of source code must retain the above copyright
  7. # notice, this list of conditions and the following disclaimer.
  8. # * Redistributions in binary form must reproduce the above copyright
  9. # notice, this list of conditions and the following disclaimer in the
  10. # documentation and/or other materials provided with the distribution.
  11. # * Neither the name of the organization nor the
  12. # names of its contributors may be used to endorse or promote products
  13. # derived from this software without specific prior written permission.
  14. #
  15. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  16. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  17. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
  19. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. import os
  26. import stat
  27. import sys
  28. import time
  29. import warnings
  30. from libarchive import _libarchive
  31. from io import StringIO
  32. PY3 = sys.version_info[0] >= 3
  33. # Suggested block size for libarchive. Libarchive may adjust it.
  34. BLOCK_SIZE = 10240
  35. MTIME_FORMAT = ""
  36. # Default encoding scheme.
  37. ENCODING = "utf-8"
  38. # Functions to initialize read/write for various libarchive supported formats and filters.
  39. FORMATS = {
  40. None: (_libarchive.archive_read_support_format_all, None),
  41. "tar": (
  42. _libarchive.archive_read_support_format_tar,
  43. _libarchive.archive_write_set_format_ustar,
  44. ),
  45. "pax": (
  46. _libarchive.archive_read_support_format_tar,
  47. _libarchive.archive_write_set_format_pax,
  48. ),
  49. "gnu": (
  50. _libarchive.archive_read_support_format_gnutar,
  51. _libarchive.archive_write_set_format_gnutar,
  52. ),
  53. "zip": (
  54. _libarchive.archive_read_support_format_zip,
  55. _libarchive.archive_write_set_format_zip,
  56. ),
  57. "rar": (_libarchive.archive_read_support_format_rar, None),
  58. "7zip": (_libarchive.archive_read_support_format_7zip, None),
  59. "ar": (_libarchive.archive_read_support_format_ar, None),
  60. "cab": (_libarchive.archive_read_support_format_cab, None),
  61. "cpio": (
  62. _libarchive.archive_read_support_format_cpio,
  63. _libarchive.archive_write_set_format_cpio_newc,
  64. ),
  65. "iso": (
  66. _libarchive.archive_read_support_format_iso9660,
  67. _libarchive.archive_write_set_format_iso9660,
  68. ),
  69. "lha": (_libarchive.archive_read_support_format_lha, None),
  70. "xar": (
  71. _libarchive.archive_read_support_format_xar,
  72. _libarchive.archive_write_set_format_xar,
  73. ),
  74. }
  75. FILTERS = {
  76. None: (
  77. _libarchive.archive_read_support_filter_all,
  78. _libarchive.archive_write_add_filter_none,
  79. ),
  80. "gz": (
  81. _libarchive.archive_read_support_filter_gzip,
  82. _libarchive.archive_write_add_filter_gzip,
  83. ),
  84. "bz2": (
  85. _libarchive.archive_read_support_filter_bzip2,
  86. _libarchive.archive_write_add_filter_bzip2,
  87. ),
  88. }
  89. # Map file extensions to formats and filters. To support quick detection.
  90. FORMAT_EXTENSIONS = {
  91. ".tar": "tar",
  92. ".zip": "zip",
  93. ".rar": "rar",
  94. ".7z": "7zip",
  95. ".ar": "ar",
  96. ".cab": "cab",
  97. ".rpm": "cpio",
  98. ".cpio": "cpio",
  99. ".iso": "iso",
  100. ".lha": "lha",
  101. ".xar": "xar",
  102. }
  103. FILTER_EXTENSIONS = {
  104. ".gz": "gz",
  105. ".bz2": "bz2",
  106. }
  107. class EOF(Exception):
  108. """Raised by ArchiveInfo.from_archive() when unable to read the next
  109. archive header."""
  110. pass
  111. def version():
  112. """Returns the version of the libarchive library."""
  113. return _libarchive.archive_version_string().split()[1]
  114. def get_error(archive):
  115. """Retrieves the last error description for the given archive instance."""
  116. return _libarchive.archive_error_string(archive)
  117. def call_and_check(func, archive, *args):
  118. """Executes a libarchive function and raises an exception when appropriate."""
  119. ret = func(*args)
  120. if ret == _libarchive.ARCHIVE_OK:
  121. return
  122. elif ret == _libarchive.ARCHIVE_WARN:
  123. warnings.warn(
  124. "Warning executing function: %s." % get_error(archive), RuntimeWarning
  125. )
  126. elif ret == _libarchive.ARCHIVE_EOF:
  127. raise EOF()
  128. else:
  129. raise Exception(
  130. "Problem executing function, message is: %s." % get_error(archive)
  131. )
  132. def get_func(name, items, index):
  133. item = items.get(name, None)
  134. if item is None:
  135. return None
  136. return item[index]
  137. def guess_format(filename):
  138. if isinstance(filename, int):
  139. filename = ext = ""
  140. else:
  141. filename, ext = os.path.splitext(filename)
  142. filter = FILTER_EXTENSIONS.get(ext)
  143. if filter:
  144. filename, ext = os.path.splitext(filename)
  145. format = FORMAT_EXTENSIONS.get(ext)
  146. return format, filter
  147. def is_archive_name(filename, formats=None):
  148. """Quick check to see if the given file has an extension indiciating that it is
  149. an archive. The format parameter can be used to limit what archive format is acceptable.
  150. If omitted, all supported archive formats will be checked.
  151. This function will return the name of the most likely archive format, None if the file is
  152. unlikely to be an archive."""
  153. if formats is None:
  154. formats = list(FORMAT_EXTENSIONS.values())
  155. format, filter = guess_format(filename)
  156. if format in formats:
  157. return format
  158. def is_archive(f, formats=(None,), filters=(None,)):
  159. """Check to see if the given file is actually an archive. The format parameter
  160. can be used to specify which archive format is acceptable. If ommitted, all supported
  161. archive formats will be checked. It opens the file using libarchive. If no error is
  162. received, the file was successfully detected by the libarchive bidding process.
  163. This procedure is quite costly, so you should avoid calling it unless you are reasonably
  164. sure that the given file is an archive. In other words, you may wish to filter large
  165. numbers of file names using is_archive_name() before double-checking the positives with
  166. this function.
  167. This function will return True if the file can be opened as an archive using the given
  168. format(s)/filter(s)."""
  169. need_close = False
  170. if isinstance(f, str):
  171. f = open(f, "rb")
  172. need_close = True
  173. a = _libarchive.archive_read_new()
  174. for format in formats:
  175. format = get_func(format, FORMATS, 0)
  176. if format is None:
  177. return False
  178. format(a)
  179. for filter in filters:
  180. filter = get_func(filter, FILTERS, 0)
  181. if filter is None:
  182. return False
  183. filter(a)
  184. try:
  185. try:
  186. call_and_check(
  187. _libarchive.archive_read_open_fd, a, a, f.fileno(), BLOCK_SIZE
  188. )
  189. return True
  190. except:
  191. return False
  192. finally:
  193. _libarchive.archive_read_close(a)
  194. _libarchive.archive_read_free(a)
  195. if need_close:
  196. f.close()
  197. class EntryReadStream(object):
  198. """A file-like object for reading an entry from the archive."""
  199. def __init__(self, archive, size):
  200. self.archive = archive
  201. self.closed = False
  202. self.size = size
  203. self.bytes = 0
  204. def __enter__(self):
  205. return self
  206. def __exit__(self, *args):
  207. return
  208. def __iter__(self):
  209. if self.closed:
  210. return
  211. while True:
  212. data = self.read(BLOCK_SIZE)
  213. if not data:
  214. break
  215. yield data
  216. def __len__(self):
  217. return self.size
  218. def tell(self):
  219. return self.bytes
  220. def read(self, bytes=-1):
  221. if self.closed:
  222. return
  223. if self.bytes == self.size:
  224. # EOF already reached.
  225. return
  226. if bytes < 0:
  227. bytes = self.size - self.bytes
  228. elif self.bytes + bytes > self.size:
  229. # Limit read to remaining bytes
  230. bytes = self.size - self.bytes
  231. # Read requested bytes
  232. data = _libarchive.archive_read_data_into_str(self.archive._a, bytes)
  233. self.bytes += len(data)
  234. return data
  235. def close(self):
  236. if self.closed:
  237. return
  238. # Call archive.close() with _defer True to let it know we have been
  239. # closed and it is now safe to actually close.
  240. self.archive.close(_defer=True)
  241. self.archive = None
  242. self.closed = True
  243. class EntryWriteStream(object):
  244. """A file-like object for writing an entry to an archive.
  245. If the size is known ahead of time and provided, then the file contents
  246. are not buffered but flushed directly to the archive. If size is omitted,
  247. then the file contents are buffered and flushed in the close() method."""
  248. def __init__(self, archive, pathname, size=None):
  249. self.archive = archive
  250. self.entry = Entry(pathname=pathname, mtime=time.time(), mode=stat.S_IFREG)
  251. if size is None:
  252. self.buffer = StringIO()
  253. else:
  254. self.buffer = None
  255. self.entry.size = size
  256. self.entry.to_archive(self.archive)
  257. self.bytes = 0
  258. self.closed = False
  259. def __enter__(self):
  260. return self
  261. def __exit__(self, *args):
  262. self.close()
  263. def __del__(self):
  264. self.close()
  265. def __len__(self):
  266. return self.bytes
  267. def tell(self):
  268. return self.bytes
  269. def write(self, data):
  270. if self.closed:
  271. raise Exception("Cannot write to closed stream.")
  272. if self.buffer:
  273. self.buffer.write(data)
  274. else:
  275. _libarchive.archive_write_data_from_str(
  276. self.archive._a, data.encode(ENCODING)
  277. )
  278. self.bytes += len(data)
  279. def close(self):
  280. if self.closed:
  281. return
  282. if self.buffer:
  283. self.entry.size = self.buffer.tell()
  284. self.entry.to_archive(self.archive)
  285. _libarchive.archive_write_data_from_str(
  286. self.archive._a, self.buffer.getvalue().encode(ENCODING)
  287. )
  288. _libarchive.archive_write_finish_entry(self.archive._a)
  289. # Call archive.close() with _defer True to let it know we have been
  290. # closed and it is now safe to actually close.
  291. self.archive.close(_defer=True)
  292. self.archive = None
  293. self.closed = True
  294. class Entry(object):
  295. """An entry within an archive. Represents the header data and it's location within the archive."""
  296. def __init__(
  297. self,
  298. pathname=None,
  299. size=None,
  300. mtime=None,
  301. mode=None,
  302. hpos=None,
  303. encoding=ENCODING,
  304. ):
  305. # , symlink=None
  306. self.pathname = pathname
  307. self.size = size
  308. self.mtime = mtime
  309. self.mode = mode
  310. self.hpos = hpos
  311. self.encoding = encoding
  312. self.symlink = ""
  313. @property
  314. def header_position(self):
  315. return self.hpos
  316. @classmethod
  317. def from_archive(cls, archive, encoding=ENCODING):
  318. """Instantiates an Entry class and sets all the properties from an archive header."""
  319. e = _libarchive.archive_entry_new()
  320. try:
  321. call_and_check(
  322. _libarchive.archive_read_next_header2, archive._a, archive._a, e
  323. )
  324. mode = _libarchive.archive_entry_filetype(e)
  325. mode |= _libarchive.archive_entry_perm(e)
  326. if PY3:
  327. pathname = _libarchive.archive_entry_pathname(e)
  328. else:
  329. pathname = _libarchive.archive_entry_pathname(e).decode(encoding)
  330. entry = cls(
  331. pathname=pathname,
  332. size=_libarchive.archive_entry_size(e),
  333. mtime=_libarchive.archive_entry_mtime(e),
  334. mode=mode,
  335. hpos=archive.header_position,
  336. )
  337. if entry.issym():
  338. symLinkPath = _libarchive.archive_entry_symlink(e)
  339. entry.symlink = symLinkPath
  340. finally:
  341. _libarchive.archive_entry_free(e)
  342. return entry
  343. @classmethod
  344. def from_file(cls, f, entry=None, encoding=ENCODING):
  345. """Instantiates an Entry class and sets all the properties from a file on the file system.
  346. f can be a file-like object or a path."""
  347. if entry is None:
  348. entry = cls(encoding=encoding)
  349. if entry.pathname is None:
  350. if isinstance(f, str):
  351. st = os.stat(f)
  352. entry.pathname = f
  353. entry.size = st.st_size
  354. entry.mtime = st.st_mtime
  355. entry.mode = st.st_mode
  356. elif hasattr(f, "fileno"):
  357. st = os.fstat(f.fileno())
  358. entry.pathname = getattr(f, "name", None)
  359. entry.size = st.st_size
  360. entry.mtime = st.st_mtime
  361. entry.mode = st.st_mode
  362. else:
  363. entry.pathname = getattr(f, "pathname", None)
  364. entry.size = getattr(f, "size", 0)
  365. entry.mtime = getattr(f, "mtime", time.time())
  366. entry.mode = stat.S_IFREG
  367. return entry
  368. def to_archive(self, archive):
  369. """Creates an archive header and writes it to the given archive."""
  370. e = _libarchive.archive_entry_new()
  371. try:
  372. if PY3:
  373. _libarchive.archive_entry_set_pathname(e, self.pathname)
  374. else:
  375. _libarchive.archive_entry_set_pathname(
  376. e, self.pathname.encode(self.encoding)
  377. )
  378. _libarchive.archive_entry_set_filetype(e, stat.S_IFMT(self.mode))
  379. _libarchive.archive_entry_set_perm(e, stat.S_IMODE(self.mode))
  380. _libarchive.archive_entry_set_size(e, self.size)
  381. _libarchive.archive_entry_set_mtime(e, self.mtime, 0)
  382. if stat.S_ISLNK(self.mode):
  383. _libarchive.archive_entry_set_symlink(e, self.symlink)
  384. call_and_check(_libarchive.archive_write_header, archive._a, archive._a, e)
  385. finally:
  386. _libarchive.archive_entry_free(e)
  387. def isdir(self):
  388. return stat.S_ISDIR(self.mode)
  389. def isfile(self):
  390. return stat.S_ISREG(self.mode)
  391. def issym(self):
  392. return stat.S_ISLNK(self.mode)
  393. def isfifo(self):
  394. return stat.S_ISFIFO(self.mode)
  395. def ischr(self):
  396. return stat.S_ISCHR(self.mode)
  397. def isblk(self):
  398. return stat.S_ISBLK(self.mode)
  399. class Archive(object):
  400. """A low-level archive reader which provides forward-only iteration. Consider
  401. this a light-weight pythonic libarchive wrapper."""
  402. def __init__(
  403. self,
  404. f,
  405. mode="r",
  406. format=None,
  407. filter=None,
  408. entry_class=Entry,
  409. encoding=ENCODING,
  410. blocksize=BLOCK_SIZE,
  411. password=None,
  412. ):
  413. assert mode in ("r", "w", "wb", "a"), 'Mode should be "r", "w", "wb", or "a".'
  414. self._stream = None
  415. self.encoding = encoding
  416. self.blocksize = blocksize
  417. self.password = password
  418. if isinstance(f, str):
  419. self.filename = f
  420. f = open(f, mode)
  421. # Only close it if we opened it...
  422. self._defer_close = True
  423. elif hasattr(f, "fileno"):
  424. self.filename = getattr(f, "name", None)
  425. # Leave the fd alone, caller should manage it...
  426. self._defer_close = False
  427. else:
  428. raise Exception("Provided file is not path or open file.")
  429. self.f = f
  430. self.mode = mode
  431. # Guess the format/filter from file name (if not provided)
  432. if self.filename:
  433. if format is None:
  434. format = guess_format(self.filename)[0]
  435. if filter is None:
  436. filter = guess_format(self.filename)[1]
  437. self.format = format
  438. self.filter = filter
  439. # The class to use for entries.
  440. self.entry_class = entry_class
  441. # Select filter/format functions.
  442. if self.mode == "r":
  443. self.format_func = get_func(self.format, FORMATS, 0)
  444. if self.format_func is None:
  445. raise Exception("Unsupported format %s" % format)
  446. self.filter_func = get_func(self.filter, FILTERS, 0)
  447. if self.filter_func is None:
  448. raise Exception("Unsupported filter %s" % filter)
  449. else:
  450. # TODO: how to support appending?
  451. if self.format is None:
  452. raise Exception("You must specify a format for writing.")
  453. self.format_func = get_func(self.format, FORMATS, 1)
  454. if self.format_func is None:
  455. raise Exception("Unsupported format %s" % format)
  456. self.filter_func = get_func(self.filter, FILTERS, 1)
  457. if self.filter_func is None:
  458. raise Exception("Unsupported filter %s" % filter)
  459. # Open the archive, apply filter/format functions.
  460. self.init()
  461. def __iter__(self):
  462. while True:
  463. try:
  464. yield self.entry_class.from_archive(self, encoding=self.encoding)
  465. except EOF:
  466. break
  467. def __enter__(self):
  468. return self
  469. def __exit__(self, type, value, traceback):
  470. self.denit()
  471. def __del__(self):
  472. self.close()
  473. def set_initial_options(self):
  474. pass
  475. def init(self):
  476. if self.mode == "r":
  477. self._a = _libarchive.archive_read_new()
  478. else:
  479. self._a = _libarchive.archive_write_new()
  480. self.format_func(self._a)
  481. self.filter_func(self._a)
  482. self.set_initial_options()
  483. if self.mode == "r":
  484. if self.password:
  485. if isinstance(self.password, list):
  486. for pwd in self.password:
  487. self.add_passphrase(pwd)
  488. else:
  489. self.add_passphrase(self.password)
  490. call_and_check(
  491. _libarchive.archive_read_open_fd,
  492. self._a,
  493. self._a,
  494. self.f.fileno(),
  495. self.blocksize,
  496. )
  497. else:
  498. if self.password:
  499. self.set_passphrase(self.password)
  500. call_and_check(
  501. _libarchive.archive_write_open_fd, self._a, self._a, self.f.fileno()
  502. )
  503. def denit(self):
  504. """Closes and deallocates the archive reader/writer."""
  505. if getattr(self, "_a", None) is None:
  506. return
  507. try:
  508. if self.mode == "r":
  509. _libarchive.archive_read_close(self._a)
  510. _libarchive.archive_read_free(self._a)
  511. elif self.mode == "w":
  512. _libarchive.archive_write_close(self._a)
  513. _libarchive.archive_write_free(self._a)
  514. finally:
  515. # We only want one try at this...
  516. self._a = None
  517. def close(self, _defer=False):
  518. # _defer == True is how a stream can notify Archive that the stream is
  519. # now closed. Calling it directly in not recommended.
  520. if _defer:
  521. # This call came from our open stream.
  522. self._stream = None
  523. if not self._defer_close:
  524. # We are not yet ready to close.
  525. return
  526. if self._stream is not None:
  527. # We have a stream open! don't close, but remember we were asked to.
  528. self._defer_close = True
  529. return
  530. self.denit()
  531. # If there is a file attached...
  532. if hasattr(self, "f"):
  533. # Make sure it is not already closed...
  534. if getattr(self.f, "closed", False):
  535. return
  536. # Flush it if not read-only...
  537. if hasattr(self.f, "mode") and self.f.mode != "r" and self.f.mode != "rb":
  538. if hasattr(self.f, "flush"):
  539. self.f.flush()
  540. if hasattr(self.f, "fileno"):
  541. os.fsync(self.f.fileno())
  542. # and then close it, if we opened it...
  543. if getattr(self, "_close", None):
  544. self.f.close()
  545. @property
  546. def header_position(self):
  547. """The position within the file."""
  548. return _libarchive.archive_read_header_position(self._a)
  549. def iterpaths(self):
  550. for entry in self:
  551. yield entry.pathname
  552. def read(self, size):
  553. """Read current archive entry contents into string."""
  554. return _libarchive.archive_read_data_into_str(self._a, size)
  555. def readpath(self, f):
  556. """Write current archive entry contents to file. f can be a file-like object or
  557. a path."""
  558. if isinstance(f, str):
  559. basedir = os.path.dirname(f)
  560. if not os.path.exists(basedir):
  561. os.makedirs(basedir)
  562. f = open(f, "w")
  563. return _libarchive.archive_read_data_into_fd(self._a, f.fileno())
  564. def readstream(self, size):
  565. """Returns a file-like object for reading current archive entry contents."""
  566. self._stream = EntryReadStream(self, size)
  567. return self._stream
  568. def write(self, member, data=None):
  569. """Writes a string buffer to the archive as the given entry."""
  570. if isinstance(member, str):
  571. member = self.entry_class(pathname=member, encoding=self.encoding)
  572. member.mode = stat.S_IFREG
  573. member.mtime = time.time()
  574. if data:
  575. member.size = len(data)
  576. member.to_archive(self)
  577. if data:
  578. if PY3:
  579. if isinstance(data, bytes):
  580. result = _libarchive.archive_write_data_from_str(self._a, data)
  581. else:
  582. result = _libarchive.archive_write_data_from_str(
  583. self._a, data.encode(self.encoding)
  584. )
  585. else:
  586. result = _libarchive.archive_write_data_from_str(self._a, data)
  587. _libarchive.archive_write_finish_entry(self._a)
  588. def writepath(self, f, pathname=None, folder=False):
  589. """Writes a file to the archive. f can be a file-like object or a path. Uses
  590. write() to do the actual writing."""
  591. member = self.entry_class.from_file(f, encoding=self.encoding)
  592. if isinstance(f, str):
  593. if os.path.isfile(f):
  594. f = open(f, "r")
  595. if pathname:
  596. member.pathname = pathname
  597. if folder and not member.isdir():
  598. member.mode = stat.S_IFDIR
  599. if hasattr(f, "read"):
  600. # TODO: optimize this to write directly from f to archive.
  601. self.write(member, data=f.read())
  602. else:
  603. self.write(member)
  604. def writestream(self, pathname, size=None):
  605. """Returns a file-like object for writing a new entry."""
  606. self._stream = EntryWriteStream(self, pathname, size)
  607. return self._stream
  608. def printlist(self, s=sys.stdout):
  609. for entry in self:
  610. s.write(entry.size)
  611. s.write("\t")
  612. s.write(entry.mtime.strftime(MTIME_FORMAT))
  613. s.write("\t")
  614. s.write(entry.pathname)
  615. s.flush()
  616. def add_passphrase(self, password):
  617. """Adds a password to the archive."""
  618. _libarchive.archive_read_add_passphrase(self._a, password)
  619. def set_passphrase(self, password):
  620. """Sets a password for the archive."""
  621. _libarchive.archive_write_set_passphrase(self._a, password)
  622. class SeekableArchive(Archive):
  623. """A class that provides random-access to archive entries. It does this by using one
  624. or many Archive instances to seek to the correct location. The best performance will
  625. occur when reading archive entries in the order in which they appear in the archive.
  626. Reading out of order will cause the archive to be closed and opened each time a
  627. reverse seek is needed."""
  628. def __init__(self, f, **kwargs):
  629. self._stream = None
  630. # Convert file to open file. We need this to reopen the archive.
  631. mode = kwargs.setdefault("mode", "r")
  632. if isinstance(f, str):
  633. f = open(f, mode)
  634. super(SeekableArchive, self).__init__(f, **kwargs)
  635. self.entries = []
  636. self.eof = False
  637. def __iter__(self):
  638. for entry in self.entries:
  639. yield entry
  640. if not self.eof:
  641. try:
  642. for entry in super(SeekableArchive, self).__iter__():
  643. self.entries.append(entry)
  644. yield entry
  645. except StopIteration:
  646. self.eof = True
  647. def reopen(self):
  648. """Seeks the underlying fd to 0 position, then opens the archive. If the archive
  649. is already open, this will effectively re-open it (rewind to the beginning)."""
  650. self.denit()
  651. self.f.seek(0)
  652. self.init()
  653. def getentry(self, pathname):
  654. """Take a name or entry object and returns an entry object."""
  655. for entry in self:
  656. if entry.pathname == pathname:
  657. return entry
  658. raise KeyError(pathname)
  659. def seek(self, entry):
  660. """Seeks the archive to the requested entry. Will reopen if necessary."""
  661. move = entry.header_position - self.header_position
  662. if move != 0:
  663. if move < 0:
  664. # can't move back, re-open archive:
  665. self.reopen()
  666. # move to proper position in stream
  667. for curr in super(SeekableArchive, self).__iter__():
  668. if curr.header_position == entry.header_position:
  669. break
  670. def read(self, member):
  671. """Return the requested archive entry contents as a string."""
  672. entry = self.getentry(member)
  673. self.seek(entry)
  674. return super(SeekableArchive, self).read(entry.size)
  675. def readpath(self, member, f):
  676. entry = self.getentry(member)
  677. self.seek(entry)
  678. return super(SeekableArchive, self).readpath(f)
  679. def readstream(self, member):
  680. """Returns a file-like object for reading requested archive entry contents."""
  681. entry = self.getentry(member)
  682. self.seek(entry)
  683. self._stream = EntryReadStream(self, entry.size)
  684. return self._stream