|
- # coding: utf-8
-
- # from: https://github.com/file/file/raw/master/python/magic.py
-
- # LICENSE:
- # Copyright (c) Ian F. Darwin 1986-1995.
- # Software written by Ian F. Darwin and others;
- # maintained 1995-present by Christos Zoulas and others.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions
- # are met:
- # 1. Redistributions of source code must retain the above copyright
- # notice immediately at the beginning of the file, without modification,
- # this list of conditions, and the following disclaimer.
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in the
- # documentation and/or other materials provided with the distribution.
- #
- # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
- # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- # SUCH DAMAGE.
-
- '''
- Python bindings for libmagic
- '''
-
- import ctypes
- import pathlib
- import threading
- import unittest
-
- from collections import namedtuple
-
- from ctypes import *
- from ctypes.util import find_library
-
- from .utils import _debprint
-
- __all__ = [
- 'detect_from_filename',
- 'detect_from_fobj',
- 'detect_from_content',
- ]
-
-
- def _init():
- """
- Loads the shared library through ctypes and returns a library
- L{ctypes.CDLL} instance
- """
- return ctypes.cdll.LoadLibrary(find_library('magic'))
-
- _libraries = {}
- _libraries['magic'] = _init()
-
- # Flag constants for open and setflags
- MAGIC_NONE = NONE = 0
- MAGIC_DEBUG = DEBUG = 1
- MAGIC_SYMLINK = SYMLINK = 2
- MAGIC_COMPRESS = COMPRESS = 4
- MAGIC_DEVICES = DEVICES = 8
- MAGIC_MIME_TYPE = MIME_TYPE = 16
- MAGIC_CONTINUE = CONTINUE = 32
- MAGIC_CHECK = CHECK = 64
- MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
- MAGIC_RAW = RAW = 256
- MAGIC_ERROR = ERROR = 512
- MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
- MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING
- MAGIC_APPLE = APPLE = 2048
-
- MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
- MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
- MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
- MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
- MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
- MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
- MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
- MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
- MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
-
- MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
-
- MAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0
- MAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1
- MAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2
- MAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3
- MAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4
- MAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5
- MAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6
-
- FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding',
- 'name', 'compressed_type'), defaults=[ '' ])
-
- class magic_set(Structure):
- pass
- magic_set._fields_ = []
- magic_t = POINTER(magic_set)
-
- _open = _libraries['magic'].magic_open
- _open.restype = magic_t
- _open.argtypes = [c_int]
-
- _close = _libraries['magic'].magic_close
- _close.restype = None
- _close.argtypes = [magic_t]
-
- _file = _libraries['magic'].magic_file
- _file.restype = c_char_p
- _file.argtypes = [magic_t, c_char_p]
-
- _descriptor = _libraries['magic'].magic_descriptor
- _descriptor.restype = c_char_p
- _descriptor.argtypes = [magic_t, c_int]
-
- _buffer = _libraries['magic'].magic_buffer
- _buffer.restype = c_char_p
- _buffer.argtypes = [magic_t, c_void_p, c_size_t]
-
- _error = _libraries['magic'].magic_error
- _error.restype = c_char_p
- _error.argtypes = [magic_t]
-
- _setflags = _libraries['magic'].magic_setflags
- _setflags.restype = c_int
- _setflags.argtypes = [magic_t, c_int]
-
- _load = _libraries['magic'].magic_load
- _load.restype = c_int
- _load.argtypes = [magic_t, c_char_p]
-
- _compile = _libraries['magic'].magic_compile
- _compile.restype = c_int
- _compile.argtypes = [magic_t, c_char_p]
-
- _check = _libraries['magic'].magic_check
- _check.restype = c_int
- _check.argtypes = [magic_t, c_char_p]
-
- _list = _libraries['magic'].magic_list
- _list.restype = c_int
- _list.argtypes = [magic_t, c_char_p]
-
- _errno = _libraries['magic'].magic_errno
- _errno.restype = c_int
- _errno.argtypes = [magic_t]
-
- _getparam = _libraries['magic'].magic_getparam
- _getparam.restype = c_int
- _getparam.argtypes = [magic_t, c_int, c_void_p]
-
- _setparam = _libraries['magic'].magic_setparam
- _setparam.restype = c_int
- _setparam.argtypes = [magic_t, c_int, c_void_p]
-
- _mgp = _libraries['magic'].magic_getpath
- _mgp.restype = c_char_p
- _mgp.argtypes = [ c_char_p, c_int ]
-
- _mlb = _libraries['magic'].magic_load_buffers
- _mlb.restype = c_int
- _mlb.argtypes = [ magic_t, POINTER(c_void_p), POINTER(c_size_t), c_size_t ]
-
- class Magic(object):
- def __init__(self, ms):
- self._magic_t = ms
-
- def close(self):
- """
- Closes the magic database and deallocates any resources used.
- """
- _close(self._magic_t)
-
- @staticmethod
- def __tostr(s):
- if s is None:
- return None
- if isinstance(s, str):
- return s
- try: # keep Python 2 compatibility
- return str(s, 'utf-8')
- except TypeError:
- return str(s)
-
- @staticmethod
- def __tobytes(b):
- if b is None:
- return None
- if isinstance(b, bytes):
- return b
- try: # keep Python 2 compatibility
- return bytes(b, 'utf-8')
- except TypeError:
- return bytes(b)
-
- def file(self, filename):
- """
- Returns a textual description of the contents of the argument passed
- as a filename or None if an error occurred and the MAGIC_ERROR flag
- is set. A call to errno() will return the numeric error code.
- """
- return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
-
- def descriptor(self, fd):
- """
- Returns a textual description of the contents of the argument passed
- as a file descriptor or None if an error occurred and the MAGIC_ERROR
- flag is set. A call to errno() will return the numeric error code.
- """
- return Magic.__tostr(_descriptor(self._magic_t, fd))
-
- def buffer(self, buf):
- """
- Returns a textual description of the contents of the argument passed
- as a buffer or None if an error occurred and the MAGIC_ERROR flag
- is set. A call to errno() will return the numeric error code.
- """
- return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
-
- def error(self):
- """
- Returns a textual explanation of the last error or None
- if there was no error.
- """
- return Magic.__tostr(_error(self._magic_t))
-
- def setflags(self, flags):
- """
- Set flags on the magic object which determine how magic checking
- behaves; a bitwise OR of the flags described in libmagic(3), but
- without the MAGIC_ prefix.
-
- Returns -1 on systems that don't support utime(2) or utimes(2)
- when PRESERVE_ATIME is set.
- """
- return _setflags(self._magic_t, flags)
-
- def load(self, filename=None):
- """
- Must be called to load entries in the colon separated list of database
- files passed as argument or the default database file if no argument
- before any magic queries can be performed.
-
- Returns 0 on success and -1 on failure.
- """
-
- files = _mgp(None, 0).decode('utf-8') + '.mgc' + ':' + str(pathlib.Path(__file__).parent / 'magic')
-
- return _load(self._magic_t, files.encode('utf-8'))
-
- def compile(self, dbs):
- """
- Compile entries in the colon separated list of database files
- passed as argument or the default database file if no argument.
- The compiled files created are named from the basename(1) of each file
- argument with ".mgc" appended to it.
-
- Returns 0 on success and -1 on failure.
- """
- return _compile(self._magic_t, Magic.__tobytes(dbs))
-
- def check(self, dbs):
- """
- Check the validity of entries in the colon separated list of
- database files passed as argument or the default database file
- if no argument.
-
- Returns 0 on success and -1 on failure.
- """
- return _check(self._magic_t, Magic.__tobytes(dbs))
-
- def list(self, dbs):
- """
- Check the validity of entries in the colon separated list of
- database files passed as argument or the default database file
- if no argument.
-
- Returns 0 on success and -1 on failure.
- """
- return _list(self._magic_t, Magic.__tobytes(dbs))
-
- def errno(self):
- """
- Returns a numeric error code. If return value is 0, an internal
- magic error occurred. If return value is non-zero, the value is
- an OS error code. Use the errno module or os.strerror() can be used
- to provide detailed error information.
- """
- return _errno(self._magic_t)
-
- def getparam(self, param):
- """
- Returns the param value if successful and -1 if the parameter
- was unknown.
- """
- v = c_int()
- i = _getparam(self._magic_t, param, byref(v))
- if i == -1:
- return -1
- return v.value
-
- def setparam(self, param, value):
- """
- Returns 0 if successful and -1 if the parameter was unknown.
- """
- v = c_int(value)
- return _setparam(self._magic_t, param, byref(v))
-
-
- def open(flags):
- """
- Returns a magic object on success and None on failure.
- Flags argument as for setflags.
- """
- magic_t = _open(flags)
- if magic_t is None:
- return None
- return Magic(magic_t)
-
-
- # Objects used by `detect_from_` functions
- class error(Exception):
- pass
-
- class MagicDetect(object):
- def __init__(self):
- undo = []
- self._loaded = []
- err = None
- for attr, flags in [
- ('mime_magic', MAGIC_MIME),
- ('none_magic', MAGIC_NONE),
- ('mimecomp_magic', MAGIC_MIME|MAGIC_COMPRESS),
- ('nonecomp_magic', MAGIC_NONE|MAGIC_COMPRESS),
- ]:
- r = open(flags)
- if r is None:
- break
-
- if r.load() == -1:
- r.close()
- break
-
- setattr(self, attr, r)
- undo.append(attr)
- else:
- self._loaded = undo
- undo = []
-
- for attr in undo:
- getattr(self, attr).close()
- setattr(self, attr, None)
-
- if undo:
- raise error
-
- def __del__(self):
- for attr in self._loaded:
- getattr(self, attr).close()
- setattr(self, attr, None)
-
- threadlocal = threading.local()
-
- def _detect_make():
- v = getattr(threadlocal, "magic_instance", None)
- if v is None:
- v = MagicDetect()
- setattr(threadlocal, "magic_instance", v)
- return v
-
- def _create_filemagic(mime_detected, type_detected):
- try:
- mime_type, mime_encoding = mime_detected.split('; ', 1)
- except ValueError:
- raise ValueError(mime_detected)
-
- kwargs = {}
- try:
- mime_encoding, compressed_type = mime_encoding.split(' compressed-encoding=')
- except ValueError:
- pass
- else:
- compressed_type, _ = compressed_type.split('; ', 1)
- kwargs['compressed_type'] = compressed_type
-
- return FileMagic(name=type_detected, mime_type=mime_type,
- encoding=mime_encoding.replace('charset=', ''), **kwargs)
-
- def detect_from_filename(filename):
- '''Detect mime type, encoding and file type from a filename
-
- Returns a `FileMagic` namedtuple.
- '''
- x = _detect_make()
-
- t = x.mimecomp_magic.file(filename)
-
- # if there's a decomp error, don't look at decomp
- if t.startswith('application/x-decompression-error') or \
- t.startswith('gzip ERROR: Unknown compression format'):
- return _create_filemagic(x.mime_magic.file(filename),
- x.none_magic.file(filename))
-
- return _create_filemagic(t, x.nonecomp_magic.file(filename))
-
- def detect_from_fobj(fobj):
- '''Detect mime type, encoding and file type from file-like object
-
- Returns a `FileMagic` namedtuple.
- '''
-
- file_descriptor = fobj.fileno()
- x = _detect_make()
- return _create_filemagic(x.mime_magic.descriptor(file_descriptor),
- x.none_magic.descriptor(file_descriptor))
-
-
- def detect_from_content(byte_content):
- '''Detect mime type, encoding and file type from bytes
-
- Returns a `FileMagic` namedtuple.
- '''
-
- x = _detect_make()
- return _create_filemagic(x.mime_magic.buffer(byte_content),
- x.none_magic.buffer(byte_content))
-
- class _TestMagic(unittest.TestCase):
- def test_create_filemagic(self):
- a = _create_filemagic('application/x-tar; charset=binary compressed-encoding=application/gzip; charset=binary', 'foobar')
-
- self.assertEqual(a.mime_type, 'application/x-tar')
- self.assertEqual(a.encoding, 'binary')
- self.assertEqual(a.compressed_type, 'application/gzip')
- self.assertEqual(a.name, 'foobar')
|