magic.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. """
  2. magic is a wrapper around the libmagic file identification library.
  3. Copyright Adam Hupp, adam@hupp.org
  4. license MIT
  5. Usage:
  6. >>> import magic
  7. >>> magic.from_file("testdata/test.pdf")
  8. 'PDF document, version 1.2'
  9. >>> magic.from_file("testdata/test.pdf", mime=True)
  10. 'application/pdf'
  11. >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
  12. 'PDF document, version 1.2'
  13. >>>
  14. """
  15. import sys
  16. import glob
  17. import os.path
  18. import ctypes
  19. import ctypes.util
  20. import threading
  21. from ctypes import c_char_p, c_int, c_size_t, c_void_p
  22. class MagicException(Exception): pass
  23. class Magic:
  24. """
  25. Magic is a wrapper around the libmagic C library.
  26. """
  27. def __init__(self, mime=False, magic_file=None, mime_encoding=False,
  28. keep_going=False):
  29. """
  30. Create a new libmagic wrapper.
  31. mime - if True, mimetypes are returned instead of textual descriptions
  32. mime_encoding - if True, codec is returned
  33. magic_file - use a mime database other than the system default
  34. keep_going - don't stop at the first match, keep going
  35. """
  36. flags = MAGIC_NONE
  37. if mime:
  38. flags |= MAGIC_MIME
  39. elif mime_encoding:
  40. flags |= MAGIC_MIME_ENCODING
  41. if keep_going:
  42. flags |= MAGIC_CONTINUE
  43. self.cookie = magic_open(flags)
  44. magic_load(self.cookie, magic_file)
  45. self.thread = threading.currentThread()
  46. def from_buffer(self, buf):
  47. """
  48. Identify the contents of `buf`
  49. """
  50. self._thread_check()
  51. return magic_buffer(self.cookie, buf)
  52. def from_file(self, filename):
  53. """
  54. Identify the contents of file `filename`
  55. raises IOError if the file does not exist
  56. """
  57. self._thread_check()
  58. if not os.path.exists(filename):
  59. raise IOError("File does not exist: " + filename)
  60. return magic_file(self.cookie, filename)
  61. def _thread_check(self):
  62. if self.thread != threading.currentThread():
  63. raise Exception('attempting to use libmagic on multiple threads will '
  64. 'end in SEGV. Prefer to use the module functions '
  65. 'from_file or from_buffer, or carefully manage direct '
  66. 'use of the Magic class')
  67. def __del__(self):
  68. # no _thread_check here because there can be no other
  69. # references to this object at this point.
  70. # during shutdown magic_close may have been cleared already so
  71. # make sure it exists before using it.
  72. # the self.cookie check should be unnessary and was an
  73. # incorrect fix for a threading problem, however I'm leaving
  74. # it in because it's harmless and I'm slightly afraid to
  75. # remove it.
  76. if self.cookie and magic_close:
  77. magic_close(self.cookie)
  78. self.cookie = None
  79. instances = threading.local()
  80. def _get_magic_type(mime):
  81. i = instances.__dict__.get(mime)
  82. if i is None:
  83. i = instances.__dict__[mime] = Magic(mime=mime)
  84. return i
  85. def from_file(filename, mime=False):
  86. """"
  87. Accepts a filename and returns the detected filetype. Return
  88. value is the mimetype if mime=True, otherwise a human readable
  89. name.
  90. >>> magic.from_file("testdata/test.pdf", mime=True)
  91. 'application/pdf'
  92. """
  93. m = _get_magic_type(mime)
  94. return m.from_file(filename)
  95. def from_buffer(buffer, mime=False):
  96. """
  97. Accepts a binary string and returns the detected filetype. Return
  98. value is the mimetype if mime=True, otherwise a human readable
  99. name.
  100. >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
  101. 'PDF document, version 1.2'
  102. """
  103. m = _get_magic_type(mime)
  104. return m.from_buffer(buffer)
  105. libmagic = None
  106. # Let's try to find magic or magic1
  107. dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1')
  108. # This is necessary because find_library returns None if it doesn't find the library
  109. if dll:
  110. libmagic = ctypes.CDLL(dll)
  111. if not libmagic or not libmagic._name:
  112. import sys
  113. platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib',
  114. '/usr/local/lib/libmagic.dylib'] +
  115. # Assumes there will only be one version installed
  116. glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'),
  117. 'win32': ['magic1.dll']}
  118. for dll in platform_to_lib.get(sys.platform, []):
  119. try:
  120. libmagic = ctypes.CDLL(dll)
  121. break
  122. except OSError:
  123. pass
  124. if not libmagic or not libmagic._name:
  125. # It is better to raise an ImportError since we are importing magic module
  126. raise ImportError('failed to find libmagic. Check your installation')
  127. magic_t = ctypes.c_void_p
  128. def errorcheck_null(result, func, args):
  129. if result is None:
  130. err = magic_error(args[0])
  131. raise MagicException(err)
  132. else:
  133. return result
  134. def errorcheck_negative_one(result, func, args):
  135. if result is -1:
  136. err = magic_error(args[0])
  137. raise MagicException(err)
  138. else:
  139. return result
  140. def coerce_filename(filename):
  141. if filename is None:
  142. return None
  143. return filename.encode(sys.getfilesystemencoding())
  144. magic_open = libmagic.magic_open
  145. magic_open.restype = magic_t
  146. magic_open.argtypes = [c_int]
  147. magic_close = libmagic.magic_close
  148. magic_close.restype = None
  149. magic_close.argtypes = [magic_t]
  150. magic_error = libmagic.magic_error
  151. magic_error.restype = c_char_p
  152. magic_error.argtypes = [magic_t]
  153. magic_errno = libmagic.magic_errno
  154. magic_errno.restype = c_int
  155. magic_errno.argtypes = [magic_t]
  156. _magic_file = libmagic.magic_file
  157. _magic_file.restype = c_char_p
  158. _magic_file.argtypes = [magic_t, c_char_p]
  159. _magic_file.errcheck = errorcheck_null
  160. def magic_file(cookie, filename):
  161. return _magic_file(cookie, coerce_filename(filename))
  162. _magic_buffer = libmagic.magic_buffer
  163. _magic_buffer.restype = c_char_p
  164. _magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
  165. _magic_buffer.errcheck = errorcheck_null
  166. def magic_buffer(cookie, buf):
  167. return _magic_buffer(cookie, buf, len(buf))
  168. _magic_load = libmagic.magic_load
  169. _magic_load.restype = c_int
  170. _magic_load.argtypes = [magic_t, c_char_p]
  171. _magic_load.errcheck = errorcheck_negative_one
  172. def magic_load(cookie, filename):
  173. return _magic_load(cookie, coerce_filename(filename))
  174. magic_setflags = libmagic.magic_setflags
  175. magic_setflags.restype = c_int
  176. magic_setflags.argtypes = [magic_t, c_int]
  177. magic_check = libmagic.magic_check
  178. magic_check.restype = c_int
  179. magic_check.argtypes = [magic_t, c_char_p]
  180. magic_compile = libmagic.magic_compile
  181. magic_compile.restype = c_int
  182. magic_compile.argtypes = [magic_t, c_char_p]
  183. MAGIC_NONE = 0x000000 # No flags
  184. MAGIC_DEBUG = 0x000001 # Turn on debugging
  185. MAGIC_SYMLINK = 0x000002 # Follow symlinks
  186. MAGIC_COMPRESS = 0x000004 # Check inside compressed files
  187. MAGIC_DEVICES = 0x000008 # Look at the contents of devices
  188. MAGIC_MIME = 0x000010 # Return a mime string
  189. MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
  190. MAGIC_CONTINUE = 0x000020 # Return all matches
  191. MAGIC_CHECK = 0x000040 # Print warnings to stderr
  192. MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
  193. MAGIC_RAW = 0x000100 # Don't translate unprintable chars
  194. MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
  195. MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
  196. MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
  197. MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
  198. MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
  199. MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
  200. MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
  201. MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
  202. MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
  203. MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens