|
@@ -36,116 +36,81 @@ PY3 = sys.version_info[0] >= 3 |
|
|
# Suggested block size for libarchive. Libarchive may adjust it. |
|
|
# Suggested block size for libarchive. Libarchive may adjust it. |
|
|
BLOCK_SIZE = 10240 |
|
|
BLOCK_SIZE = 10240 |
|
|
|
|
|
|
|
|
MTIME_FORMAT = "" |
|
|
|
|
|
|
|
|
MTIME_FORMAT = '' |
|
|
|
|
|
|
|
|
# Default encoding scheme. |
|
|
# Default encoding scheme. |
|
|
ENCODING = "utf-8" |
|
|
|
|
|
|
|
|
ENCODING = 'utf-8' |
|
|
|
|
|
|
|
|
# Functions to initialize read/write for various libarchive supported formats and filters. |
|
|
# Functions to initialize read/write for various libarchive supported formats and filters. |
|
|
FORMATS = { |
|
|
FORMATS = { |
|
|
None: (_libarchive.archive_read_support_format_all, None), |
|
|
None: (_libarchive.archive_read_support_format_all, None), |
|
|
"tar": ( |
|
|
|
|
|
_libarchive.archive_read_support_format_tar, |
|
|
|
|
|
_libarchive.archive_write_set_format_ustar, |
|
|
|
|
|
), |
|
|
|
|
|
"pax": ( |
|
|
|
|
|
_libarchive.archive_read_support_format_tar, |
|
|
|
|
|
_libarchive.archive_write_set_format_pax, |
|
|
|
|
|
), |
|
|
|
|
|
"gnu": ( |
|
|
|
|
|
_libarchive.archive_read_support_format_gnutar, |
|
|
|
|
|
_libarchive.archive_write_set_format_gnutar, |
|
|
|
|
|
), |
|
|
|
|
|
"zip": ( |
|
|
|
|
|
_libarchive.archive_read_support_format_zip, |
|
|
|
|
|
_libarchive.archive_write_set_format_zip, |
|
|
|
|
|
), |
|
|
|
|
|
"rar": (_libarchive.archive_read_support_format_rar, None), |
|
|
|
|
|
"7zip": (_libarchive.archive_read_support_format_7zip, None), |
|
|
|
|
|
"ar": (_libarchive.archive_read_support_format_ar, None), |
|
|
|
|
|
"cab": (_libarchive.archive_read_support_format_cab, None), |
|
|
|
|
|
"cpio": ( |
|
|
|
|
|
_libarchive.archive_read_support_format_cpio, |
|
|
|
|
|
_libarchive.archive_write_set_format_cpio_newc, |
|
|
|
|
|
), |
|
|
|
|
|
"iso": ( |
|
|
|
|
|
_libarchive.archive_read_support_format_iso9660, |
|
|
|
|
|
_libarchive.archive_write_set_format_iso9660, |
|
|
|
|
|
), |
|
|
|
|
|
"lha": (_libarchive.archive_read_support_format_lha, None), |
|
|
|
|
|
"xar": ( |
|
|
|
|
|
_libarchive.archive_read_support_format_xar, |
|
|
|
|
|
_libarchive.archive_write_set_format_xar, |
|
|
|
|
|
), |
|
|
|
|
|
|
|
|
'tar': (_libarchive.archive_read_support_format_tar, _libarchive.archive_write_set_format_ustar), |
|
|
|
|
|
'pax': (_libarchive.archive_read_support_format_tar, _libarchive.archive_write_set_format_pax), |
|
|
|
|
|
'gnu': (_libarchive.archive_read_support_format_gnutar, _libarchive.archive_write_set_format_gnutar), |
|
|
|
|
|
'zip': (_libarchive.archive_read_support_format_zip, _libarchive.archive_write_set_format_zip), |
|
|
|
|
|
'rar': (_libarchive.archive_read_support_format_rar, None), |
|
|
|
|
|
'7zip': (_libarchive.archive_read_support_format_7zip, None), |
|
|
|
|
|
'ar': (_libarchive.archive_read_support_format_ar, None), |
|
|
|
|
|
'cab': (_libarchive.archive_read_support_format_cab, None), |
|
|
|
|
|
'cpio': (_libarchive.archive_read_support_format_cpio, _libarchive.archive_write_set_format_cpio_newc), |
|
|
|
|
|
'iso': (_libarchive.archive_read_support_format_iso9660, _libarchive.archive_write_set_format_iso9660), |
|
|
|
|
|
'lha': (_libarchive.archive_read_support_format_lha, None), |
|
|
|
|
|
'xar': (_libarchive.archive_read_support_format_xar, _libarchive.archive_write_set_format_xar), |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
FILTERS = { |
|
|
FILTERS = { |
|
|
None: ( |
|
|
|
|
|
_libarchive.archive_read_support_filter_all, |
|
|
|
|
|
_libarchive.archive_write_add_filter_none, |
|
|
|
|
|
), |
|
|
|
|
|
"gz": ( |
|
|
|
|
|
_libarchive.archive_read_support_filter_gzip, |
|
|
|
|
|
_libarchive.archive_write_add_filter_gzip, |
|
|
|
|
|
), |
|
|
|
|
|
"bz2": ( |
|
|
|
|
|
_libarchive.archive_read_support_filter_bzip2, |
|
|
|
|
|
_libarchive.archive_write_add_filter_bzip2, |
|
|
|
|
|
), |
|
|
|
|
|
|
|
|
None: (_libarchive.archive_read_support_filter_all, _libarchive.archive_write_add_filter_none), |
|
|
|
|
|
'gz': (_libarchive.archive_read_support_filter_gzip, _libarchive.archive_write_add_filter_gzip), |
|
|
|
|
|
'bz2': (_libarchive.archive_read_support_filter_bzip2, _libarchive.archive_write_add_filter_bzip2), |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
# Map file extensions to formats and filters. To support quick detection. |
|
|
# Map file extensions to formats and filters. To support quick detection. |
|
|
FORMAT_EXTENSIONS = { |
|
|
FORMAT_EXTENSIONS = { |
|
|
".tar": "tar", |
|
|
|
|
|
".zip": "zip", |
|
|
|
|
|
".rar": "rar", |
|
|
|
|
|
".7z": "7zip", |
|
|
|
|
|
".ar": "ar", |
|
|
|
|
|
".cab": "cab", |
|
|
|
|
|
".rpm": "cpio", |
|
|
|
|
|
".cpio": "cpio", |
|
|
|
|
|
".iso": "iso", |
|
|
|
|
|
".lha": "lha", |
|
|
|
|
|
".xar": "xar", |
|
|
|
|
|
|
|
|
'.tar': 'tar', |
|
|
|
|
|
'.zip': 'zip', |
|
|
|
|
|
'.rar': 'rar', |
|
|
|
|
|
'.7z': '7zip', |
|
|
|
|
|
'.ar': 'ar', |
|
|
|
|
|
'.cab': 'cab', |
|
|
|
|
|
'.rpm': 'cpio', |
|
|
|
|
|
'.cpio': 'cpio', |
|
|
|
|
|
'.iso': 'iso', |
|
|
|
|
|
'.lha': 'lha', |
|
|
|
|
|
'.xar': 'xar', |
|
|
} |
|
|
} |
|
|
FILTER_EXTENSIONS = { |
|
|
FILTER_EXTENSIONS = { |
|
|
".gz": "gz", |
|
|
|
|
|
".bz2": "bz2", |
|
|
|
|
|
|
|
|
'.gz': 'gz', |
|
|
|
|
|
'.bz2': 'bz2', |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EOF(Exception): |
|
|
class EOF(Exception): |
|
|
"""Raised by ArchiveInfo.from_archive() when unable to read the next |
|
|
|
|
|
archive header.""" |
|
|
|
|
|
|
|
|
'''Raised by ArchiveInfo.from_archive() when unable to read the next |
|
|
|
|
|
archive header.''' |
|
|
|
|
|
|
|
|
pass |
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def version(): |
|
|
def version(): |
|
|
"""Returns the version of the libarchive library.""" |
|
|
|
|
|
|
|
|
'''Returns the version of the libarchive library.''' |
|
|
return _libarchive.archive_version_string().split()[1] |
|
|
return _libarchive.archive_version_string().split()[1] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_error(archive): |
|
|
def get_error(archive): |
|
|
"""Retrieves the last error description for the given archive instance.""" |
|
|
|
|
|
|
|
|
'''Retrieves the last error description for the given archive instance.''' |
|
|
return _libarchive.archive_error_string(archive) |
|
|
return _libarchive.archive_error_string(archive) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def call_and_check(func, archive, *args): |
|
|
def call_and_check(func, archive, *args): |
|
|
"""Executes a libarchive function and raises an exception when appropriate.""" |
|
|
|
|
|
|
|
|
'''Executes a libarchive function and raises an exception when appropriate.''' |
|
|
ret = func(*args) |
|
|
ret = func(*args) |
|
|
if ret == _libarchive.ARCHIVE_OK: |
|
|
if ret == _libarchive.ARCHIVE_OK: |
|
|
return |
|
|
return |
|
|
elif ret == _libarchive.ARCHIVE_WARN: |
|
|
elif ret == _libarchive.ARCHIVE_WARN: |
|
|
warnings.warn( |
|
|
|
|
|
"Warning executing function: %s." % get_error(archive), RuntimeWarning |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
warnings.warn('Warning executing function: %s.' % get_error(archive), RuntimeWarning) |
|
|
elif ret == _libarchive.ARCHIVE_EOF: |
|
|
elif ret == _libarchive.ARCHIVE_EOF: |
|
|
raise EOF() |
|
|
raise EOF() |
|
|
else: |
|
|
else: |
|
|
raise Exception( |
|
|
|
|
|
"Problem executing function, message is: %s." % get_error(archive) |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
raise Exception('Problem executing function, message is: %s.' % get_error(archive)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_func(name, items, index): |
|
|
def get_func(name, items, index): |
|
@@ -157,7 +122,7 @@ def get_func(name, items, index): |
|
|
|
|
|
|
|
|
def guess_format(filename): |
|
|
def guess_format(filename): |
|
|
if isinstance(filename, int): |
|
|
if isinstance(filename, int): |
|
|
filename = ext = "" |
|
|
|
|
|
|
|
|
filename = ext = '' |
|
|
else: |
|
|
else: |
|
|
filename, ext = os.path.splitext(filename) |
|
|
filename, ext = os.path.splitext(filename) |
|
|
filter = FILTER_EXTENSIONS.get(ext) |
|
|
filter = FILTER_EXTENSIONS.get(ext) |
|
@@ -168,12 +133,12 @@ def guess_format(filename): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_archive_name(filename, formats=None): |
|
|
def is_archive_name(filename, formats=None): |
|
|
"""Quick check to see if the given file has an extension indiciating that it is |
|
|
|
|
|
|
|
|
'''Quick check to see if the given file has an extension indiciating that it is |
|
|
an archive. The format parameter can be used to limit what archive format is acceptable. |
|
|
an archive. The format parameter can be used to limit what archive format is acceptable. |
|
|
If omitted, all supported archive formats will be checked. |
|
|
If omitted, all supported archive formats will be checked. |
|
|
|
|
|
|
|
|
This function will return the name of the most likely archive format, None if the file is |
|
|
This function will return the name of the most likely archive format, None if the file is |
|
|
unlikely to be an archive.""" |
|
|
|
|
|
|
|
|
unlikely to be an archive.''' |
|
|
if formats is None: |
|
|
if formats is None: |
|
|
formats = list(FORMAT_EXTENSIONS.values()) |
|
|
formats = list(FORMAT_EXTENSIONS.values()) |
|
|
format, filter = guess_format(filename) |
|
|
format, filter = guess_format(filename) |
|
@@ -182,7 +147,7 @@ def is_archive_name(filename, formats=None): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_archive(f, formats=(None,), filters=(None,)): |
|
|
def is_archive(f, formats=(None,), filters=(None,)): |
|
|
"""Check to see if the given file is actually an archive. The format parameter |
|
|
|
|
|
|
|
|
'''Check to see if the given file is actually an archive. The format parameter |
|
|
can be used to specify which archive format is acceptable. If ommitted, all supported |
|
|
can be used to specify which archive format is acceptable. If ommitted, all supported |
|
|
archive formats will be checked. It opens the file using libarchive. If no error is |
|
|
archive formats will be checked. It opens the file using libarchive. If no error is |
|
|
received, the file was successfully detected by the libarchive bidding process. |
|
|
received, the file was successfully detected by the libarchive bidding process. |
|
@@ -193,10 +158,10 @@ def is_archive(f, formats=(None,), filters=(None,)): |
|
|
this function. |
|
|
this function. |
|
|
|
|
|
|
|
|
This function will return True if the file can be opened as an archive using the given |
|
|
This function will return True if the file can be opened as an archive using the given |
|
|
format(s)/filter(s).""" |
|
|
|
|
|
|
|
|
format(s)/filter(s).''' |
|
|
need_close = False |
|
|
need_close = False |
|
|
if isinstance(f, str): |
|
|
if isinstance(f, str): |
|
|
f = open(f, "rb") |
|
|
|
|
|
|
|
|
f = open(f, 'rb') |
|
|
need_close = True |
|
|
need_close = True |
|
|
a = _libarchive.archive_read_new() |
|
|
a = _libarchive.archive_read_new() |
|
|
for format in formats: |
|
|
for format in formats: |
|
@@ -211,9 +176,7 @@ def is_archive(f, formats=(None,), filters=(None,)): |
|
|
filter(a) |
|
|
filter(a) |
|
|
try: |
|
|
try: |
|
|
try: |
|
|
try: |
|
|
call_and_check( |
|
|
|
|
|
_libarchive.archive_read_open_fd, a, a, f.fileno(), BLOCK_SIZE |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
call_and_check(_libarchive.archive_read_open_fd, a, a, f.fileno(), BLOCK_SIZE) |
|
|
return True |
|
|
return True |
|
|
except: |
|
|
except: |
|
|
return False |
|
|
return False |
|
@@ -225,7 +188,7 @@ def is_archive(f, formats=(None,), filters=(None,)): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EntryReadStream(object): |
|
|
class EntryReadStream(object): |
|
|
"""A file-like object for reading an entry from the archive.""" |
|
|
|
|
|
|
|
|
'''A file-like object for reading an entry from the archive.''' |
|
|
|
|
|
|
|
|
def __init__(self, archive, size): |
|
|
def __init__(self, archive, size): |
|
|
self.archive = archive |
|
|
self.archive = archive |
|
@@ -281,11 +244,11 @@ class EntryReadStream(object): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EntryWriteStream(object): |
|
|
class EntryWriteStream(object): |
|
|
"""A file-like object for writing an entry to an archive. |
|
|
|
|
|
|
|
|
'''A file-like object for writing an entry to an archive. |
|
|
|
|
|
|
|
|
If the size is known ahead of time and provided, then the file contents |
|
|
If the size is known ahead of time and provided, then the file contents |
|
|
are not buffered but flushed directly to the archive. If size is omitted, |
|
|
are not buffered but flushed directly to the archive. If size is omitted, |
|
|
then the file contents are buffered and flushed in the close() method.""" |
|
|
|
|
|
|
|
|
then the file contents are buffered and flushed in the close() method.''' |
|
|
|
|
|
|
|
|
def __init__(self, archive, pathname, size=None): |
|
|
def __init__(self, archive, pathname, size=None): |
|
|
self.archive = archive |
|
|
self.archive = archive |
|
@@ -316,13 +279,11 @@ class EntryWriteStream(object): |
|
|
|
|
|
|
|
|
def write(self, data): |
|
|
def write(self, data): |
|
|
if self.closed: |
|
|
if self.closed: |
|
|
raise Exception("Cannot write to closed stream.") |
|
|
|
|
|
|
|
|
raise Exception('Cannot write to closed stream.') |
|
|
if self.buffer: |
|
|
if self.buffer: |
|
|
self.buffer.write(data) |
|
|
self.buffer.write(data) |
|
|
else: |
|
|
else: |
|
|
_libarchive.archive_write_data_from_str( |
|
|
|
|
|
self.archive._a, data.encode(ENCODING) |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
_libarchive.archive_write_data_from_str(self.archive._a, data.encode(ENCODING)) |
|
|
self.bytes += len(data) |
|
|
self.bytes += len(data) |
|
|
|
|
|
|
|
|
def close(self): |
|
|
def close(self): |
|
@@ -331,9 +292,7 @@ class EntryWriteStream(object): |
|
|
if self.buffer: |
|
|
if self.buffer: |
|
|
self.entry.size = self.buffer.tell() |
|
|
self.entry.size = self.buffer.tell() |
|
|
self.entry.to_archive(self.archive) |
|
|
self.entry.to_archive(self.archive) |
|
|
_libarchive.archive_write_data_from_str( |
|
|
|
|
|
self.archive._a, self.buffer.getvalue().encode(ENCODING) |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
_libarchive.archive_write_data_from_str(self.archive._a, self.buffer.getvalue().encode(ENCODING)) |
|
|
_libarchive.archive_write_finish_entry(self.archive._a) |
|
|
_libarchive.archive_write_finish_entry(self.archive._a) |
|
|
|
|
|
|
|
|
# Call archive.close() with _defer True to let it know we have been |
|
|
# Call archive.close() with _defer True to let it know we have been |
|
@@ -344,17 +303,9 @@ class EntryWriteStream(object): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Entry(object): |
|
|
class Entry(object): |
|
|
"""An entry within an archive. Represents the header data and it's location within the archive.""" |
|
|
|
|
|
|
|
|
'''An entry within an archive. Represents the header data and it's location within the archive.''' |
|
|
|
|
|
|
|
|
def __init__( |
|
|
|
|
|
self, |
|
|
|
|
|
pathname=None, |
|
|
|
|
|
size=None, |
|
|
|
|
|
mtime=None, |
|
|
|
|
|
mode=None, |
|
|
|
|
|
hpos=None, |
|
|
|
|
|
encoding=ENCODING, |
|
|
|
|
|
): |
|
|
|
|
|
|
|
|
def __init__(self, pathname=None, size=None, mtime=None, mode=None, hpos=None, encoding=ENCODING): |
|
|
|
|
|
|
|
|
# , symlink=None |
|
|
# , symlink=None |
|
|
self.pathname = pathname |
|
|
self.pathname = pathname |
|
@@ -372,12 +323,10 @@ class Entry(object): |
|
|
|
|
|
|
|
|
@classmethod |
|
|
@classmethod |
|
|
def from_archive(cls, archive, encoding=ENCODING): |
|
|
def from_archive(cls, archive, encoding=ENCODING): |
|
|
"""Instantiates an Entry class and sets all the properties from an archive header.""" |
|
|
|
|
|
|
|
|
'''Instantiates an Entry class and sets all the properties from an archive header.''' |
|
|
e = _libarchive.archive_entry_new() |
|
|
e = _libarchive.archive_entry_new() |
|
|
try: |
|
|
try: |
|
|
call_and_check( |
|
|
|
|
|
_libarchive.archive_read_next_header2, archive._a, archive._a, e |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
call_and_check(_libarchive.archive_read_next_header2, archive._a, archive._a, e) |
|
|
mode = _libarchive.archive_entry_filetype(e) |
|
|
mode = _libarchive.archive_entry_filetype(e) |
|
|
mode |= _libarchive.archive_entry_perm(e) |
|
|
mode |= _libarchive.archive_entry_perm(e) |
|
|
|
|
|
|
|
@@ -404,8 +353,8 @@ class Entry(object): |
|
|
|
|
|
|
|
|
@classmethod |
|
|
@classmethod |
|
|
def from_file(cls, f, entry=None, encoding=ENCODING): |
|
|
def from_file(cls, f, entry=None, encoding=ENCODING): |
|
|
"""Instantiates an Entry class and sets all the properties from a file on the file system. |
|
|
|
|
|
f can be a file-like object or a path.""" |
|
|
|
|
|
|
|
|
'''Instantiates an Entry class and sets all the properties from a file on the file system. |
|
|
|
|
|
f can be a file-like object or a path.''' |
|
|
if entry is None: |
|
|
if entry is None: |
|
|
entry = cls(encoding=encoding) |
|
|
entry = cls(encoding=encoding) |
|
|
if entry.pathname is None: |
|
|
if entry.pathname is None: |
|
@@ -415,30 +364,29 @@ class Entry(object): |
|
|
entry.size = st.st_size |
|
|
entry.size = st.st_size |
|
|
entry.mtime = st.st_mtime |
|
|
entry.mtime = st.st_mtime |
|
|
entry.mode = st.st_mode |
|
|
entry.mode = st.st_mode |
|
|
elif hasattr(f, "fileno"): |
|
|
|
|
|
|
|
|
elif hasattr(f, 'fileno'): |
|
|
st = os.fstat(f.fileno()) |
|
|
st = os.fstat(f.fileno()) |
|
|
entry.pathname = getattr(f, "name", None) |
|
|
|
|
|
|
|
|
entry.pathname = getattr(f, 'name', None) |
|
|
entry.size = st.st_size |
|
|
entry.size = st.st_size |
|
|
entry.mtime = st.st_mtime |
|
|
entry.mtime = st.st_mtime |
|
|
entry.mode = st.st_mode |
|
|
entry.mode = st.st_mode |
|
|
else: |
|
|
else: |
|
|
entry.pathname = getattr(f, "pathname", None) |
|
|
|
|
|
entry.size = getattr(f, "size", 0) |
|
|
|
|
|
entry.mtime = getattr(f, "mtime", time.time()) |
|
|
|
|
|
|
|
|
entry.pathname = getattr(f, 'pathname', None) |
|
|
|
|
|
entry.size = getattr(f, 'size', 0) |
|
|
|
|
|
entry.mtime = getattr(f, 'mtime', time.time()) |
|
|
entry.mode = stat.S_IFREG |
|
|
entry.mode = stat.S_IFREG |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return entry |
|
|
return entry |
|
|
|
|
|
|
|
|
def to_archive(self, archive): |
|
|
def to_archive(self, archive): |
|
|
"""Creates an archive header and writes it to the given archive.""" |
|
|
|
|
|
|
|
|
'''Creates an archive header and writes it to the given archive.''' |
|
|
e = _libarchive.archive_entry_new() |
|
|
e = _libarchive.archive_entry_new() |
|
|
try: |
|
|
try: |
|
|
if PY3: |
|
|
if PY3: |
|
|
_libarchive.archive_entry_set_pathname(e, self.pathname) |
|
|
_libarchive.archive_entry_set_pathname(e, self.pathname) |
|
|
else: |
|
|
else: |
|
|
_libarchive.archive_entry_set_pathname( |
|
|
|
|
|
e, self.pathname.encode(self.encoding) |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
_libarchive.archive_entry_set_pathname(e, self.pathname.encode(self.encoding)) |
|
|
_libarchive.archive_entry_set_filetype(e, stat.S_IFMT(self.mode)) |
|
|
_libarchive.archive_entry_set_filetype(e, stat.S_IFMT(self.mode)) |
|
|
_libarchive.archive_entry_set_perm(e, stat.S_IMODE(self.mode)) |
|
|
_libarchive.archive_entry_set_perm(e, stat.S_IMODE(self.mode)) |
|
|
_libarchive.archive_entry_set_size(e, self.size) |
|
|
_libarchive.archive_entry_set_size(e, self.size) |
|
@@ -472,13 +420,13 @@ class Entry(object): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Archive(object): |
|
|
class Archive(object): |
|
|
"""A low-level archive reader which provides forward-only iteration. Consider |
|
|
|
|
|
this a light-weight pythonic libarchive wrapper.""" |
|
|
|
|
|
|
|
|
'''A low-level archive reader which provides forward-only iteration. Consider |
|
|
|
|
|
this a light-weight pythonic libarchive wrapper.''' |
|
|
|
|
|
|
|
|
def __init__( |
|
|
def __init__( |
|
|
self, |
|
|
self, |
|
|
f, |
|
|
f, |
|
|
mode="r", |
|
|
|
|
|
|
|
|
mode='r', |
|
|
format=None, |
|
|
format=None, |
|
|
filter=None, |
|
|
filter=None, |
|
|
entry_class=Entry, |
|
|
entry_class=Entry, |
|
@@ -486,7 +434,7 @@ class Archive(object): |
|
|
blocksize=BLOCK_SIZE, |
|
|
blocksize=BLOCK_SIZE, |
|
|
password=None, |
|
|
password=None, |
|
|
): |
|
|
): |
|
|
assert mode in ("r", "w", "wb", "a"), 'Mode should be "r", "w", "wb", or "a".' |
|
|
|
|
|
|
|
|
assert mode in ('r', 'w', 'wb', 'a'), 'Mode should be "r", "w", "wb", or "a".' |
|
|
self._stream = None |
|
|
self._stream = None |
|
|
self.encoding = encoding |
|
|
self.encoding = encoding |
|
|
self.blocksize = blocksize |
|
|
self.blocksize = blocksize |
|
@@ -496,12 +444,12 @@ class Archive(object): |
|
|
f = open(f, mode) |
|
|
f = open(f, mode) |
|
|
# Only close it if we opened it... |
|
|
# Only close it if we opened it... |
|
|
self._defer_close = True |
|
|
self._defer_close = True |
|
|
elif hasattr(f, "fileno"): |
|
|
|
|
|
self.filename = getattr(f, "name", None) |
|
|
|
|
|
|
|
|
elif hasattr(f, 'fileno'): |
|
|
|
|
|
self.filename = getattr(f, 'name', None) |
|
|
# Leave the fd alone, caller should manage it... |
|
|
# Leave the fd alone, caller should manage it... |
|
|
self._defer_close = False |
|
|
self._defer_close = False |
|
|
else: |
|
|
else: |
|
|
raise Exception("Provided file is not path or open file.") |
|
|
|
|
|
|
|
|
raise Exception('Provided file is not path or open file.') |
|
|
self.f = f |
|
|
self.f = f |
|
|
self.mode = mode |
|
|
self.mode = mode |
|
|
# Guess the format/filter from file name (if not provided) |
|
|
# Guess the format/filter from file name (if not provided) |
|
@@ -515,23 +463,23 @@ class Archive(object): |
|
|
# The class to use for entries. |
|
|
# The class to use for entries. |
|
|
self.entry_class = entry_class |
|
|
self.entry_class = entry_class |
|
|
# Select filter/format functions. |
|
|
# Select filter/format functions. |
|
|
if self.mode == "r": |
|
|
|
|
|
|
|
|
if self.mode == 'r': |
|
|
self.format_func = get_func(self.format, FORMATS, 0) |
|
|
self.format_func = get_func(self.format, FORMATS, 0) |
|
|
if self.format_func is None: |
|
|
if self.format_func is None: |
|
|
raise Exception("Unsupported format %s" % format) |
|
|
|
|
|
|
|
|
raise Exception('Unsupported format %s' % format) |
|
|
self.filter_func = get_func(self.filter, FILTERS, 0) |
|
|
self.filter_func = get_func(self.filter, FILTERS, 0) |
|
|
if self.filter_func is None: |
|
|
if self.filter_func is None: |
|
|
raise Exception("Unsupported filter %s" % filter) |
|
|
|
|
|
|
|
|
raise Exception('Unsupported filter %s' % filter) |
|
|
else: |
|
|
else: |
|
|
# TODO: how to support appending? |
|
|
# TODO: how to support appending? |
|
|
if self.format is None: |
|
|
if self.format is None: |
|
|
raise Exception("You must specify a format for writing.") |
|
|
|
|
|
|
|
|
raise Exception('You must specify a format for writing.') |
|
|
self.format_func = get_func(self.format, FORMATS, 1) |
|
|
self.format_func = get_func(self.format, FORMATS, 1) |
|
|
if self.format_func is None: |
|
|
if self.format_func is None: |
|
|
raise Exception("Unsupported format %s" % format) |
|
|
|
|
|
|
|
|
raise Exception('Unsupported format %s' % format) |
|
|
self.filter_func = get_func(self.filter, FILTERS, 1) |
|
|
self.filter_func = get_func(self.filter, FILTERS, 1) |
|
|
if self.filter_func is None: |
|
|
if self.filter_func is None: |
|
|
raise Exception("Unsupported filter %s" % filter) |
|
|
|
|
|
|
|
|
raise Exception('Unsupported filter %s' % filter) |
|
|
# Open the archive, apply filter/format functions. |
|
|
# Open the archive, apply filter/format functions. |
|
|
self.init() |
|
|
self.init() |
|
|
|
|
|
|
|
@@ -551,47 +499,31 @@ class Archive(object): |
|
|
def __del__(self): |
|
|
def __del__(self): |
|
|
self.close() |
|
|
self.close() |
|
|
|
|
|
|
|
|
def set_initial_options(self): |
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
def init(self): |
|
|
def init(self): |
|
|
if self.mode == "r": |
|
|
|
|
|
|
|
|
if self.mode == 'r': |
|
|
self._a = _libarchive.archive_read_new() |
|
|
self._a = _libarchive.archive_read_new() |
|
|
else: |
|
|
else: |
|
|
self._a = _libarchive.archive_write_new() |
|
|
self._a = _libarchive.archive_write_new() |
|
|
self.format_func(self._a) |
|
|
self.format_func(self._a) |
|
|
self.filter_func(self._a) |
|
|
self.filter_func(self._a) |
|
|
self.set_initial_options() |
|
|
|
|
|
if self.mode == "r": |
|
|
|
|
|
|
|
|
if self.mode == 'r': |
|
|
if self.password: |
|
|
if self.password: |
|
|
if isinstance(self.password, list): |
|
|
|
|
|
for pwd in self.password: |
|
|
|
|
|
self.add_passphrase(pwd) |
|
|
|
|
|
else: |
|
|
|
|
|
self.add_passphrase(self.password) |
|
|
|
|
|
call_and_check( |
|
|
|
|
|
_libarchive.archive_read_open_fd, |
|
|
|
|
|
self._a, |
|
|
|
|
|
self._a, |
|
|
|
|
|
self.f.fileno(), |
|
|
|
|
|
self.blocksize, |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
self.add_passphrase(self.password) |
|
|
|
|
|
call_and_check(_libarchive.archive_read_open_fd, self._a, self._a, self.f.fileno(), self.blocksize) |
|
|
else: |
|
|
else: |
|
|
if self.password: |
|
|
if self.password: |
|
|
self.set_passphrase(self.password) |
|
|
self.set_passphrase(self.password) |
|
|
call_and_check( |
|
|
|
|
|
_libarchive.archive_write_open_fd, self._a, self._a, self.f.fileno() |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
call_and_check(_libarchive.archive_write_open_fd, self._a, self._a, self.f.fileno()) |
|
|
|
|
|
|
|
|
def denit(self): |
|
|
def denit(self): |
|
|
"""Closes and deallocates the archive reader/writer.""" |
|
|
|
|
|
if getattr(self, "_a", None) is None: |
|
|
|
|
|
|
|
|
'''Closes and deallocates the archive reader/writer.''' |
|
|
|
|
|
if getattr(self, '_a', None) is None: |
|
|
return |
|
|
return |
|
|
try: |
|
|
try: |
|
|
if self.mode == "r": |
|
|
|
|
|
|
|
|
if self.mode == 'r': |
|
|
_libarchive.archive_read_close(self._a) |
|
|
_libarchive.archive_read_close(self._a) |
|
|
_libarchive.archive_read_free(self._a) |
|
|
_libarchive.archive_read_free(self._a) |
|
|
elif self.mode == "w": |
|
|
|
|
|
|
|
|
elif self.mode == 'w': |
|
|
_libarchive.archive_write_close(self._a) |
|
|
_libarchive.archive_write_close(self._a) |
|
|
_libarchive.archive_write_free(self._a) |
|
|
_libarchive.archive_write_free(self._a) |
|
|
finally: |
|
|
finally: |
|
@@ -613,23 +545,23 @@ class Archive(object): |
|
|
return |
|
|
return |
|
|
self.denit() |
|
|
self.denit() |
|
|
# If there is a file attached... |
|
|
# If there is a file attached... |
|
|
if hasattr(self, "f"): |
|
|
|
|
|
|
|
|
if hasattr(self, 'f'): |
|
|
# Make sure it is not already closed... |
|
|
# Make sure it is not already closed... |
|
|
if getattr(self.f, "closed", False): |
|
|
|
|
|
|
|
|
if getattr(self.f, 'closed', False): |
|
|
return |
|
|
return |
|
|
# Flush it if not read-only... |
|
|
# Flush it if not read-only... |
|
|
if hasattr(self.f, "mode") and self.f.mode != "r" and self.f.mode != "rb": |
|
|
|
|
|
|
|
|
if hasattr(self.f, "mode") and self.f.mode != 'r' and self.f.mode != 'rb': |
|
|
if hasattr(self.f, "flush"): |
|
|
if hasattr(self.f, "flush"): |
|
|
self.f.flush() |
|
|
self.f.flush() |
|
|
if hasattr(self.f, "fileno"): |
|
|
if hasattr(self.f, "fileno"): |
|
|
os.fsync(self.f.fileno()) |
|
|
os.fsync(self.f.fileno()) |
|
|
# and then close it, if we opened it... |
|
|
# and then close it, if we opened it... |
|
|
if getattr(self, "_close", None): |
|
|
|
|
|
|
|
|
if getattr(self, '_close', None): |
|
|
self.f.close() |
|
|
self.f.close() |
|
|
|
|
|
|
|
|
@property |
|
|
@property |
|
|
def header_position(self): |
|
|
def header_position(self): |
|
|
"""The position within the file.""" |
|
|
|
|
|
|
|
|
'''The position within the file.''' |
|
|
return _libarchive.archive_read_header_position(self._a) |
|
|
return _libarchive.archive_read_header_position(self._a) |
|
|
|
|
|
|
|
|
def iterpaths(self): |
|
|
def iterpaths(self): |
|
@@ -637,33 +569,30 @@ class Archive(object): |
|
|
yield entry.pathname |
|
|
yield entry.pathname |
|
|
|
|
|
|
|
|
def read(self, size): |
|
|
def read(self, size): |
|
|
"""Read current archive entry contents into string.""" |
|
|
|
|
|
|
|
|
'''Read current archive entry contents into string.''' |
|
|
return _libarchive.archive_read_data_into_str(self._a, size) |
|
|
return _libarchive.archive_read_data_into_str(self._a, size) |
|
|
|
|
|
|
|
|
def readpath(self, f): |
|
|
def readpath(self, f): |
|
|
"""Write current archive entry contents to file. f can be a file-like object or |
|
|
|
|
|
a path.""" |
|
|
|
|
|
|
|
|
'''Write current archive entry contents to file. f can be a file-like object or |
|
|
|
|
|
a path.''' |
|
|
if isinstance(f, str): |
|
|
if isinstance(f, str): |
|
|
basedir = os.path.dirname(f) |
|
|
basedir = os.path.dirname(f) |
|
|
if not os.path.exists(basedir): |
|
|
if not os.path.exists(basedir): |
|
|
os.makedirs(basedir) |
|
|
os.makedirs(basedir) |
|
|
f = open(f, "w") |
|
|
|
|
|
|
|
|
f = open(f, 'w') |
|
|
return _libarchive.archive_read_data_into_fd(self._a, f.fileno()) |
|
|
return _libarchive.archive_read_data_into_fd(self._a, f.fileno()) |
|
|
|
|
|
|
|
|
def readstream(self, size): |
|
|
def readstream(self, size): |
|
|
"""Returns a file-like object for reading current archive entry contents.""" |
|
|
|
|
|
|
|
|
'''Returns a file-like object for reading current archive entry contents.''' |
|
|
self._stream = EntryReadStream(self, size) |
|
|
self._stream = EntryReadStream(self, size) |
|
|
return self._stream |
|
|
return self._stream |
|
|
|
|
|
|
|
|
def write(self, member, data=None): |
|
|
def write(self, member, data=None): |
|
|
"""Writes a string buffer to the archive as the given entry.""" |
|
|
|
|
|
|
|
|
'''Writes a string buffer to the archive as the given entry.''' |
|
|
if isinstance(member, str): |
|
|
if isinstance(member, str): |
|
|
member = self.entry_class(pathname=member, encoding=self.encoding) |
|
|
member = self.entry_class(pathname=member, encoding=self.encoding) |
|
|
member.mode = stat.S_IFREG |
|
|
|
|
|
member.mtime = time.time() |
|
|
|
|
|
if data: |
|
|
if data: |
|
|
member.size = len(data) |
|
|
member.size = len(data) |
|
|
|
|
|
|
|
|
member.to_archive(self) |
|
|
member.to_archive(self) |
|
|
|
|
|
|
|
|
if data: |
|
|
if data: |
|
@@ -671,65 +600,63 @@ class Archive(object): |
|
|
if isinstance(data, bytes): |
|
|
if isinstance(data, bytes): |
|
|
result = _libarchive.archive_write_data_from_str(self._a, data) |
|
|
result = _libarchive.archive_write_data_from_str(self._a, data) |
|
|
else: |
|
|
else: |
|
|
result = _libarchive.archive_write_data_from_str( |
|
|
|
|
|
self._a, data.encode(self.encoding) |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
result = _libarchive.archive_write_data_from_str(self._a, data.encode(self.encoding)) |
|
|
else: |
|
|
else: |
|
|
result = _libarchive.archive_write_data_from_str(self._a, data) |
|
|
result = _libarchive.archive_write_data_from_str(self._a, data) |
|
|
_libarchive.archive_write_finish_entry(self._a) |
|
|
_libarchive.archive_write_finish_entry(self._a) |
|
|
|
|
|
|
|
|
def writepath(self, f, pathname=None, folder=False): |
|
|
def writepath(self, f, pathname=None, folder=False): |
|
|
"""Writes a file to the archive. f can be a file-like object or a path. Uses |
|
|
|
|
|
write() to do the actual writing.""" |
|
|
|
|
|
|
|
|
'''Writes a file to the archive. f can be a file-like object or a path. Uses |
|
|
|
|
|
write() to do the actual writing.''' |
|
|
member = self.entry_class.from_file(f, encoding=self.encoding) |
|
|
member = self.entry_class.from_file(f, encoding=self.encoding) |
|
|
if isinstance(f, str): |
|
|
if isinstance(f, str): |
|
|
if os.path.isfile(f): |
|
|
if os.path.isfile(f): |
|
|
f = open(f, "r") |
|
|
|
|
|
|
|
|
f = open(f, 'r') |
|
|
if pathname: |
|
|
if pathname: |
|
|
member.pathname = pathname |
|
|
member.pathname = pathname |
|
|
if folder and not member.isdir(): |
|
|
if folder and not member.isdir(): |
|
|
member.mode = stat.S_IFDIR |
|
|
member.mode = stat.S_IFDIR |
|
|
|
|
|
|
|
|
if hasattr(f, "read"): |
|
|
|
|
|
|
|
|
if hasattr(f, 'read'): |
|
|
# TODO: optimize this to write directly from f to archive. |
|
|
# TODO: optimize this to write directly from f to archive. |
|
|
self.write(member, data=f.read()) |
|
|
self.write(member, data=f.read()) |
|
|
else: |
|
|
else: |
|
|
self.write(member) |
|
|
self.write(member) |
|
|
|
|
|
|
|
|
def writestream(self, pathname, size=None): |
|
|
def writestream(self, pathname, size=None): |
|
|
"""Returns a file-like object for writing a new entry.""" |
|
|
|
|
|
|
|
|
'''Returns a file-like object for writing a new entry.''' |
|
|
self._stream = EntryWriteStream(self, pathname, size) |
|
|
self._stream = EntryWriteStream(self, pathname, size) |
|
|
return self._stream |
|
|
return self._stream |
|
|
|
|
|
|
|
|
def printlist(self, s=sys.stdout): |
|
|
def printlist(self, s=sys.stdout): |
|
|
for entry in self: |
|
|
for entry in self: |
|
|
s.write(entry.size) |
|
|
s.write(entry.size) |
|
|
s.write("\t") |
|
|
|
|
|
|
|
|
s.write('\t') |
|
|
s.write(entry.mtime.strftime(MTIME_FORMAT)) |
|
|
s.write(entry.mtime.strftime(MTIME_FORMAT)) |
|
|
s.write("\t") |
|
|
|
|
|
|
|
|
s.write('\t') |
|
|
s.write(entry.pathname) |
|
|
s.write(entry.pathname) |
|
|
s.flush() |
|
|
s.flush() |
|
|
|
|
|
|
|
|
def add_passphrase(self, password): |
|
|
def add_passphrase(self, password): |
|
|
"""Adds a password to the archive.""" |
|
|
|
|
|
|
|
|
'''Adds a password to the archive.''' |
|
|
_libarchive.archive_read_add_passphrase(self._a, password) |
|
|
_libarchive.archive_read_add_passphrase(self._a, password) |
|
|
|
|
|
|
|
|
def set_passphrase(self, password): |
|
|
def set_passphrase(self, password): |
|
|
"""Sets a password for the archive.""" |
|
|
|
|
|
|
|
|
'''Sets a password for the archive.''' |
|
|
_libarchive.archive_write_set_passphrase(self._a, password) |
|
|
_libarchive.archive_write_set_passphrase(self._a, password) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SeekableArchive(Archive): |
|
|
class SeekableArchive(Archive): |
|
|
"""A class that provides random-access to archive entries. It does this by using one |
|
|
|
|
|
|
|
|
'''A class that provides random-access to archive entries. It does this by using one |
|
|
or many Archive instances to seek to the correct location. The best performance will |
|
|
or many Archive instances to seek to the correct location. The best performance will |
|
|
occur when reading archive entries in the order in which they appear in the archive. |
|
|
occur when reading archive entries in the order in which they appear in the archive. |
|
|
Reading out of order will cause the archive to be closed and opened each time a |
|
|
Reading out of order will cause the archive to be closed and opened each time a |
|
|
reverse seek is needed.""" |
|
|
|
|
|
|
|
|
reverse seek is needed.''' |
|
|
|
|
|
|
|
|
def __init__(self, f, **kwargs): |
|
|
def __init__(self, f, **kwargs): |
|
|
self._stream = None |
|
|
self._stream = None |
|
|
# Convert file to open file. We need this to reopen the archive. |
|
|
# Convert file to open file. We need this to reopen the archive. |
|
|
mode = kwargs.setdefault("mode", "r") |
|
|
|
|
|
|
|
|
mode = kwargs.setdefault('mode', 'r') |
|
|
if isinstance(f, str): |
|
|
if isinstance(f, str): |
|
|
f = open(f, mode) |
|
|
f = open(f, mode) |
|
|
super(SeekableArchive, self).__init__(f, **kwargs) |
|
|
super(SeekableArchive, self).__init__(f, **kwargs) |
|
@@ -748,21 +675,21 @@ class SeekableArchive(Archive): |
|
|
self.eof = True |
|
|
self.eof = True |
|
|
|
|
|
|
|
|
def reopen(self): |
|
|
def reopen(self): |
|
|
"""Seeks the underlying fd to 0 position, then opens the archive. If the archive |
|
|
|
|
|
is already open, this will effectively re-open it (rewind to the beginning).""" |
|
|
|
|
|
|
|
|
'''Seeks the underlying fd to 0 position, then opens the archive. If the archive |
|
|
|
|
|
is already open, this will effectively re-open it (rewind to the beginning).''' |
|
|
self.denit() |
|
|
self.denit() |
|
|
self.f.seek(0) |
|
|
self.f.seek(0) |
|
|
self.init() |
|
|
self.init() |
|
|
|
|
|
|
|
|
def getentry(self, pathname): |
|
|
def getentry(self, pathname): |
|
|
"""Take a name or entry object and returns an entry object.""" |
|
|
|
|
|
|
|
|
'''Take a name or entry object and returns an entry object.''' |
|
|
for entry in self: |
|
|
for entry in self: |
|
|
if entry.pathname == pathname: |
|
|
if entry.pathname == pathname: |
|
|
return entry |
|
|
return entry |
|
|
raise KeyError(pathname) |
|
|
raise KeyError(pathname) |
|
|
|
|
|
|
|
|
def seek(self, entry): |
|
|
def seek(self, entry): |
|
|
"""Seeks the archive to the requested entry. Will reopen if necessary.""" |
|
|
|
|
|
|
|
|
'''Seeks the archive to the requested entry. Will reopen if necessary.''' |
|
|
move = entry.header_position - self.header_position |
|
|
move = entry.header_position - self.header_position |
|
|
if move != 0: |
|
|
if move != 0: |
|
|
if move < 0: |
|
|
if move < 0: |
|
@@ -774,7 +701,7 @@ class SeekableArchive(Archive): |
|
|
break |
|
|
break |
|
|
|
|
|
|
|
|
def read(self, member): |
|
|
def read(self, member): |
|
|
"""Return the requested archive entry contents as a string.""" |
|
|
|
|
|
|
|
|
'''Return the requested archive entry contents as a string.''' |
|
|
entry = self.getentry(member) |
|
|
entry = self.getentry(member) |
|
|
self.seek(entry) |
|
|
self.seek(entry) |
|
|
return super(SeekableArchive, self).read(entry.size) |
|
|
return super(SeekableArchive, self).read(entry.size) |
|
@@ -785,7 +712,7 @@ class SeekableArchive(Archive): |
|
|
return super(SeekableArchive, self).readpath(f) |
|
|
return super(SeekableArchive, self).readpath(f) |
|
|
|
|
|
|
|
|
def readstream(self, member): |
|
|
def readstream(self, member): |
|
|
"""Returns a file-like object for reading requested archive entry contents.""" |
|
|
|
|
|
|
|
|
'''Returns a file-like object for reading requested archive entry contents.''' |
|
|
entry = self.getentry(member) |
|
|
entry = self.getentry(member) |
|
|
self.seek(entry) |
|
|
self.seek(entry) |
|
|
self._stream = EntryReadStream(self, entry.size) |
|
|
self._stream = EntryReadStream(self, entry.size) |
|
|