MetaData Sharing
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

445 lines
14 KiB

  1. # coding: utf-8
  2. # from: https://github.com/file/file/raw/master/python/magic.py
  3. # LICENSE:
  4. # Copyright (c) Ian F. Darwin 1986-1995.
  5. # Software written by Ian F. Darwin and others;
  6. # maintained 1995-present by Christos Zoulas and others.
  7. #
  8. # Redistribution and use in source and binary forms, with or without
  9. # modification, are permitted provided that the following conditions
  10. # are met:
  11. # 1. Redistributions of source code must retain the above copyright
  12. # notice immediately at the beginning of the file, without modification,
  13. # this list of conditions, and the following disclaimer.
  14. # 2. Redistributions in binary form must reproduce the above copyright
  15. # notice, this list of conditions and the following disclaimer in the
  16. # documentation and/or other materials provided with the distribution.
  17. #
  18. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  19. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  22. # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  24. # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  25. # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  26. # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  27. # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  28. # SUCH DAMAGE.
  29. '''
  30. Python bindings for libmagic
  31. '''
  32. import ctypes
  33. import pathlib
  34. import threading
  35. import unittest
  36. from collections import namedtuple
  37. from ctypes import *
  38. from ctypes.util import find_library
  39. from .utils import _debprint
  40. __all__ = [
  41. 'detect_from_filename',
  42. 'detect_from_fobj',
  43. 'detect_from_content',
  44. ]
  45. def _init():
  46. """
  47. Loads the shared library through ctypes and returns a library
  48. L{ctypes.CDLL} instance
  49. """
  50. return ctypes.cdll.LoadLibrary(find_library('magic'))
  51. _libraries = {}
  52. _libraries['magic'] = _init()
  53. # Flag constants for open and setflags
  54. MAGIC_NONE = NONE = 0
  55. MAGIC_DEBUG = DEBUG = 1
  56. MAGIC_SYMLINK = SYMLINK = 2
  57. MAGIC_COMPRESS = COMPRESS = 4
  58. MAGIC_DEVICES = DEVICES = 8
  59. MAGIC_MIME_TYPE = MIME_TYPE = 16
  60. MAGIC_CONTINUE = CONTINUE = 32
  61. MAGIC_CHECK = CHECK = 64
  62. MAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
  63. MAGIC_RAW = RAW = 256
  64. MAGIC_ERROR = ERROR = 512
  65. MAGIC_MIME_ENCODING = MIME_ENCODING = 1024
  66. MAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING
  67. MAGIC_APPLE = APPLE = 2048
  68. MAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
  69. MAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
  70. MAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
  71. MAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
  72. MAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
  73. MAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
  74. MAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
  75. MAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
  76. MAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
  77. MAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
  78. MAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0
  79. MAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1
  80. MAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2
  81. MAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3
  82. MAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4
  83. MAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5
  84. MAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6
  85. FileMagic = namedtuple('FileMagic', ('mime_type', 'encoding',
  86. 'name', 'compressed_type'), defaults=[ '' ])
  87. class magic_set(Structure):
  88. pass
  89. magic_set._fields_ = []
  90. magic_t = POINTER(magic_set)
  91. _open = _libraries['magic'].magic_open
  92. _open.restype = magic_t
  93. _open.argtypes = [c_int]
  94. _close = _libraries['magic'].magic_close
  95. _close.restype = None
  96. _close.argtypes = [magic_t]
  97. _file = _libraries['magic'].magic_file
  98. _file.restype = c_char_p
  99. _file.argtypes = [magic_t, c_char_p]
  100. _descriptor = _libraries['magic'].magic_descriptor
  101. _descriptor.restype = c_char_p
  102. _descriptor.argtypes = [magic_t, c_int]
  103. _buffer = _libraries['magic'].magic_buffer
  104. _buffer.restype = c_char_p
  105. _buffer.argtypes = [magic_t, c_void_p, c_size_t]
  106. _error = _libraries['magic'].magic_error
  107. _error.restype = c_char_p
  108. _error.argtypes = [magic_t]
  109. _setflags = _libraries['magic'].magic_setflags
  110. _setflags.restype = c_int
  111. _setflags.argtypes = [magic_t, c_int]
  112. _load = _libraries['magic'].magic_load
  113. _load.restype = c_int
  114. _load.argtypes = [magic_t, c_char_p]
  115. _compile = _libraries['magic'].magic_compile
  116. _compile.restype = c_int
  117. _compile.argtypes = [magic_t, c_char_p]
  118. _check = _libraries['magic'].magic_check
  119. _check.restype = c_int
  120. _check.argtypes = [magic_t, c_char_p]
  121. _list = _libraries['magic'].magic_list
  122. _list.restype = c_int
  123. _list.argtypes = [magic_t, c_char_p]
  124. _errno = _libraries['magic'].magic_errno
  125. _errno.restype = c_int
  126. _errno.argtypes = [magic_t]
  127. _getparam = _libraries['magic'].magic_getparam
  128. _getparam.restype = c_int
  129. _getparam.argtypes = [magic_t, c_int, c_void_p]
  130. _setparam = _libraries['magic'].magic_setparam
  131. _setparam.restype = c_int
  132. _setparam.argtypes = [magic_t, c_int, c_void_p]
  133. _mgp = _libraries['magic'].magic_getpath
  134. _mgp.restype = c_char_p
  135. _mgp.argtypes = [ c_char_p, c_int ]
  136. _mlb = _libraries['magic'].magic_load_buffers
  137. _mlb.restype = c_int
  138. _mlb.argtypes = [ magic_t, POINTER(c_void_p), POINTER(c_size_t), c_size_t ]
  139. class Magic(object):
  140. def __init__(self, ms):
  141. self._magic_t = ms
  142. def close(self):
  143. """
  144. Closes the magic database and deallocates any resources used.
  145. """
  146. _close(self._magic_t)
  147. @staticmethod
  148. def __tostr(s):
  149. if s is None:
  150. return None
  151. if isinstance(s, str):
  152. return s
  153. try: # keep Python 2 compatibility
  154. return str(s, 'utf-8')
  155. except TypeError:
  156. return str(s)
  157. @staticmethod
  158. def __tobytes(b):
  159. if b is None:
  160. return None
  161. if isinstance(b, bytes):
  162. return b
  163. try: # keep Python 2 compatibility
  164. return bytes(b, 'utf-8')
  165. except TypeError:
  166. return bytes(b)
  167. def file(self, filename):
  168. """
  169. Returns a textual description of the contents of the argument passed
  170. as a filename or None if an error occurred and the MAGIC_ERROR flag
  171. is set. A call to errno() will return the numeric error code.
  172. """
  173. return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
  174. def descriptor(self, fd):
  175. """
  176. Returns a textual description of the contents of the argument passed
  177. as a file descriptor or None if an error occurred and the MAGIC_ERROR
  178. flag is set. A call to errno() will return the numeric error code.
  179. """
  180. return Magic.__tostr(_descriptor(self._magic_t, fd))
  181. def buffer(self, buf):
  182. """
  183. Returns a textual description of the contents of the argument passed
  184. as a buffer or None if an error occurred and the MAGIC_ERROR flag
  185. is set. A call to errno() will return the numeric error code.
  186. """
  187. return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
  188. def error(self):
  189. """
  190. Returns a textual explanation of the last error or None
  191. if there was no error.
  192. """
  193. return Magic.__tostr(_error(self._magic_t))
  194. def setflags(self, flags):
  195. """
  196. Set flags on the magic object which determine how magic checking
  197. behaves; a bitwise OR of the flags described in libmagic(3), but
  198. without the MAGIC_ prefix.
  199. Returns -1 on systems that don't support utime(2) or utimes(2)
  200. when PRESERVE_ATIME is set.
  201. """
  202. return _setflags(self._magic_t, flags)
  203. def load(self, filename=None):
  204. """
  205. Must be called to load entries in the colon separated list of database
  206. files passed as argument or the default database file if no argument
  207. before any magic queries can be performed.
  208. Returns 0 on success and -1 on failure.
  209. """
  210. files = _mgp(None, 0).decode('utf-8') + '.mgc' + ':' + str(pathlib.Path(__file__).parent / 'magic')
  211. return _load(self._magic_t, files.encode('utf-8'))
  212. def compile(self, dbs):
  213. """
  214. Compile entries in the colon separated list of database files
  215. passed as argument or the default database file if no argument.
  216. The compiled files created are named from the basename(1) of each file
  217. argument with ".mgc" appended to it.
  218. Returns 0 on success and -1 on failure.
  219. """
  220. return _compile(self._magic_t, Magic.__tobytes(dbs))
  221. def check(self, dbs):
  222. """
  223. Check the validity of entries in the colon separated list of
  224. database files passed as argument or the default database file
  225. if no argument.
  226. Returns 0 on success and -1 on failure.
  227. """
  228. return _check(self._magic_t, Magic.__tobytes(dbs))
  229. def list(self, dbs):
  230. """
  231. Check the validity of entries in the colon separated list of
  232. database files passed as argument or the default database file
  233. if no argument.
  234. Returns 0 on success and -1 on failure.
  235. """
  236. return _list(self._magic_t, Magic.__tobytes(dbs))
  237. def errno(self):
  238. """
  239. Returns a numeric error code. If return value is 0, an internal
  240. magic error occurred. If return value is non-zero, the value is
  241. an OS error code. Use the errno module or os.strerror() can be used
  242. to provide detailed error information.
  243. """
  244. return _errno(self._magic_t)
  245. def getparam(self, param):
  246. """
  247. Returns the param value if successful and -1 if the parameter
  248. was unknown.
  249. """
  250. v = c_int()
  251. i = _getparam(self._magic_t, param, byref(v))
  252. if i == -1:
  253. return -1
  254. return v.value
  255. def setparam(self, param, value):
  256. """
  257. Returns 0 if successful and -1 if the parameter was unknown.
  258. """
  259. v = c_int(value)
  260. return _setparam(self._magic_t, param, byref(v))
  261. def open(flags):
  262. """
  263. Returns a magic object on success and None on failure.
  264. Flags argument as for setflags.
  265. """
  266. magic_t = _open(flags)
  267. if magic_t is None:
  268. return None
  269. return Magic(magic_t)
  270. # Objects used by `detect_from_` functions
  271. class error(Exception):
  272. pass
  273. class MagicDetect(object):
  274. def __init__(self):
  275. undo = []
  276. self._loaded = []
  277. err = None
  278. for attr, flags in [
  279. ('mime_magic', MAGIC_MIME),
  280. ('none_magic', MAGIC_NONE),
  281. ('mimecomp_magic', MAGIC_MIME|MAGIC_COMPRESS),
  282. ('nonecomp_magic', MAGIC_NONE|MAGIC_COMPRESS),
  283. ]:
  284. r = open(flags)
  285. if r is None:
  286. break
  287. if r.load() == -1:
  288. r.close()
  289. break
  290. setattr(self, attr, r)
  291. undo.append(attr)
  292. else:
  293. self._loaded = undo
  294. undo = []
  295. for attr in undo:
  296. getattr(self, attr).close()
  297. setattr(self, attr, None)
  298. if undo:
  299. raise error
  300. def __del__(self):
  301. for attr in self._loaded:
  302. getattr(self, attr).close()
  303. setattr(self, attr, None)
  304. threadlocal = threading.local()
  305. def _detect_make():
  306. v = getattr(threadlocal, "magic_instance", None)
  307. if v is None:
  308. v = MagicDetect()
  309. setattr(threadlocal, "magic_instance", v)
  310. return v
  311. def _create_filemagic(mime_detected, type_detected):
  312. try:
  313. mime_type, mime_encoding = mime_detected.split('; ', 1)
  314. except ValueError:
  315. raise ValueError(mime_detected)
  316. kwargs = {}
  317. try:
  318. mime_encoding, compressed_type = mime_encoding.split(' compressed-encoding=')
  319. except ValueError:
  320. pass
  321. else:
  322. compressed_type, _ = compressed_type.split('; ', 1)
  323. kwargs['compressed_type'] = compressed_type
  324. return FileMagic(name=type_detected, mime_type=mime_type,
  325. encoding=mime_encoding.replace('charset=', ''), **kwargs)
  326. def detect_from_filename(filename):
  327. '''Detect mime type, encoding and file type from a filename
  328. Returns a `FileMagic` namedtuple.
  329. '''
  330. x = _detect_make()
  331. t = x.mimecomp_magic.file(filename)
  332. # if there's a decomp error, don't look at decomp
  333. if t.startswith('application/x-decompression-error') or \
  334. t.startswith('gzip ERROR: Unknown compression format'):
  335. return _create_filemagic(x.mime_magic.file(filename),
  336. x.none_magic.file(filename))
  337. return _create_filemagic(t, x.nonecomp_magic.file(filename))
  338. def detect_from_fobj(fobj):
  339. '''Detect mime type, encoding and file type from file-like object
  340. Returns a `FileMagic` namedtuple.
  341. '''
  342. file_descriptor = fobj.fileno()
  343. x = _detect_make()
  344. return _create_filemagic(x.mime_magic.descriptor(file_descriptor),
  345. x.none_magic.descriptor(file_descriptor))
  346. def detect_from_content(byte_content):
  347. '''Detect mime type, encoding and file type from bytes
  348. Returns a `FileMagic` namedtuple.
  349. '''
  350. x = _detect_make()
  351. return _create_filemagic(x.mime_magic.buffer(byte_content),
  352. x.none_magic.buffer(byte_content))
  353. class _TestMagic(unittest.TestCase):
  354. def test_create_filemagic(self):
  355. a = _create_filemagic('application/x-tar; charset=binary compressed-encoding=application/gzip; charset=binary', 'foobar')
  356. self.assertEqual(a.mime_type, 'application/x-tar')
  357. self.assertEqual(a.encoding, 'binary')
  358. self.assertEqual(a.compressed_type, 'application/gzip')
  359. self.assertEqual(a.name, 'foobar')